'taggit',
'gadjo',
'panikweb.paniktags',
+ 'panikweb.webstats',
'sorl.thumbnail',
'ckeditor',
'emissions',
RAVEN_CONFIG = None
+ACCESS_LOG_FILENAME = '/var/log/nginx/panikweb-access.log'
+
DEBUG_TOOLBAR_PANELS = (
'debug_toolbar.panels.version.VersionDebugPanel',
'debug_toolbar.panels.timer.TimerDebugPanel',
--- /dev/null
+from ipaddress import ip_address
+import re
+
+import dateutil.parser
+
+from django.conf import settings
+from django.core.management.base import BaseCommand
+
+from emissions.models import SoundFile
+from panikweb.webstats.models import PodcastLogLine
+
+
+class Command(BaseCommand):
+ def handle(self, *args, **kwargs):
+ regex = re.compile(
+ r'([(a-f\d\.\:)]+) - - \[(.*?)\] "GET /media/sounds/(.*?) HTTP/..." \d+ \d+ ".*?" "(.*?)"'
+ )
+ seen = {}
+ with open(settings.ACCESS_LOG_FILENAME) as fd:
+ for line in fd:
+ match = regex.match(line)
+ if not match:
+ continue
+ ip, date, path, user_agent = match.groups()
+ if not (path.endswith('.ogg') or path.endswith('.mp3')):
+ continue
+ log_datetime = dateutil.parser.parse(date.replace(':', ' ', 1))
+ previous_sighting = seen.get((ip, path, user_agent))
+ if previous_sighting and (log_datetime - previous_sighting).total_seconds() < 3600:
+ # don't record requests that already happened less than an
+ # hour ago.
+ continue
+ seen[(ip, path, user_agent)] = log_datetime
+ try:
+ soundfile = SoundFile.objects.get(id=path.split('_')[-3])
+ except SoundFile.DoesNotExist:
+ continue
+
+ PodcastLogLine.objects.get_or_create(
+ timestamp=log_datetime,
+ ip=self.anonymise_ip(ip),
+ path=path,
+ soundfile=soundfile,
+ user_agent=user_agent,
+ )
+
+ def anonymise_ip(self, ip):
+ address = ip_address(ip)
+ if address.version == 4: # apply 255.255.255.0 mask
+ address = ip_address(address.packed[:3] + b'\0')
+ elif address.version == 6: # apply ffff:ffff:ffff:ffff:0000:0000:0000:0000 mask
+ address = ip_address(address.packed[:8] + b'\0' * 8)
+ return str(address)
--- /dev/null
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.29 on 2020-12-13 15:18
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ('emissions', '0015_auto_20200404_1510'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='PodcastLogLine',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('timestamp', models.DateTimeField()),
+ ('ip', models.GenericIPAddressField()),
+ ('path', models.CharField(max_length=255)),
+ ('user_agent', models.CharField(max_length=255)),
+ ('is_bot', models.NullBooleanField()),
+ ('soundfile', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='emissions.SoundFile')),
+ ],
+ ),
+ ]
--- /dev/null
+from django.db import models
+
+
+class PodcastLogLine(models.Model):
+ timestamp = models.DateTimeField()
+ ip = models.GenericIPAddressField()
+ path = models.CharField(max_length=255)
+ soundfile = models.ForeignKey('emissions.SoundFile', null=True, on_delete=models.SET_NULL)
+ user_agent = models.CharField(max_length=255)
+ is_bot = models.NullBooleanField()