]> git.0d.be Git - panikweb.git/commitdiff
start module to keep podcast stats
authorFrédéric Péters <fpeters@0d.be>
Sun, 13 Dec 2020 13:58:07 +0000 (14:58 +0100)
committerFrédéric Péters <fpeters@0d.be>
Sun, 13 Dec 2020 14:19:13 +0000 (15:19 +0100)
panikweb/settings.py
panikweb/webstats/__init__.py [new file with mode: 0644]
panikweb/webstats/management/__init__.py [new file with mode: 0644]
panikweb/webstats/management/commands/__init__.py [new file with mode: 0644]
panikweb/webstats/management/commands/read_access_log.py [new file with mode: 0644]
panikweb/webstats/migrations/0001_initial.py [new file with mode: 0644]
panikweb/webstats/migrations/__init__.py [new file with mode: 0644]
panikweb/webstats/models.py [new file with mode: 0644]

index f2a679d4f0bd98d1439152c817effa5da5275080..7cb8da1888a67981e9e8c1ff3fb0417af5671476 100644 (file)
@@ -160,6 +160,7 @@ INSTALLED_APPS = (
     'taggit',
     'gadjo',
     'panikweb.paniktags',
+    'panikweb.webstats',
     'sorl.thumbnail',
     'ckeditor',
     'emissions',
@@ -225,6 +226,8 @@ STATSD_CLIENT = 'django_statsd.clients.null'
 
 RAVEN_CONFIG = None
 
+ACCESS_LOG_FILENAME = '/var/log/nginx/panikweb-access.log'
+
 DEBUG_TOOLBAR_PANELS = (
     'debug_toolbar.panels.version.VersionDebugPanel',
     'debug_toolbar.panels.timer.TimerDebugPanel',
diff --git a/panikweb/webstats/__init__.py b/panikweb/webstats/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/panikweb/webstats/management/__init__.py b/panikweb/webstats/management/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/panikweb/webstats/management/commands/__init__.py b/panikweb/webstats/management/commands/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/panikweb/webstats/management/commands/read_access_log.py b/panikweb/webstats/management/commands/read_access_log.py
new file mode 100644 (file)
index 0000000..768d2db
--- /dev/null
@@ -0,0 +1,53 @@
+from ipaddress import ip_address
+import re
+
+import dateutil.parser
+
+from django.conf import settings
+from django.core.management.base import BaseCommand
+
+from emissions.models import SoundFile
+from panikweb.webstats.models import PodcastLogLine
+
+
+class Command(BaseCommand):
+    def handle(self, *args, **kwargs):
+        regex = re.compile(
+            r'([(a-f\d\.\:)]+) - - \[(.*?)\] "GET /media/sounds/(.*?) HTTP/..." \d+ \d+ ".*?" "(.*?)"'
+        )
+        seen = {}
+        with open(settings.ACCESS_LOG_FILENAME) as fd:
+            for line in fd:
+                match = regex.match(line)
+                if not match:
+                    continue
+                ip, date, path, user_agent = match.groups()
+                if not (path.endswith('.ogg') or path.endswith('.mp3')):
+                    continue
+                log_datetime = dateutil.parser.parse(date.replace(':', ' ', 1))
+                previous_sighting = seen.get((ip, path, user_agent))
+                if previous_sighting and (log_datetime - previous_sighting).total_seconds() < 3600:
+                    # don't record requests that already happened less than an
+                    # hour ago.
+                    continue
+                seen[(ip, path, user_agent)] = log_datetime
+                try:
+                    soundfile = SoundFile.objects.get(id=path.split('_')[-3])
+                except SoundFile.DoesNotExist:
+                    continue
+
+                PodcastLogLine.objects.get_or_create(
+                    timestamp=log_datetime,
+                    ip=self.anonymise_ip(ip),
+                    path=path,
+                    soundfile=soundfile,
+                    user_agent=user_agent,
+                )
+
+    def anonymise_ip(self, ip):
+        address = ip_address(ip)
+        if address.version == 4:  # apply 255.255.255.0 mask
+            address = ip_address(address.packed[:3] + b'\0')
+        elif address.version == 6:  # apply ffff:ffff:ffff:ffff:0000:0000:0000:0000 mask
+            address = ip_address(address.packed[:8] + b'\0' * 8)
+        return str(address)
diff --git a/panikweb/webstats/migrations/0001_initial.py b/panikweb/webstats/migrations/0001_initial.py
new file mode 100644 (file)
index 0000000..c1ed310
--- /dev/null
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.29 on 2020-12-13 15:18
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+        ('emissions', '0015_auto_20200404_1510'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='PodcastLogLine',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('timestamp', models.DateTimeField()),
+                ('ip', models.GenericIPAddressField()),
+                ('path', models.CharField(max_length=255)),
+                ('user_agent', models.CharField(max_length=255)),
+                ('is_bot', models.NullBooleanField()),
+                ('soundfile', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='emissions.SoundFile')),
+            ],
+        ),
+    ]
diff --git a/panikweb/webstats/migrations/__init__.py b/panikweb/webstats/migrations/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/panikweb/webstats/models.py b/panikweb/webstats/models.py
new file mode 100644 (file)
index 0000000..2b19f83
--- /dev/null
@@ -0,0 +1,10 @@
+from django.db import models
+
+
+class PodcastLogLine(models.Model):
+    timestamp = models.DateTimeField()
+    ip = models.GenericIPAddressField()
+    path = models.CharField(max_length=255)
+    soundfile = models.ForeignKey('emissions.SoundFile', null=True, on_delete=models.SET_NULL)
+    user_agent = models.CharField(max_length=255)
+    is_bot = models.NullBooleanField()