From 66dab70b93c0acc9787a62a909636cb97d5e0bfe Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Thu, 21 Jul 2022 23:05:01 +0200 Subject: [PATCH] run full text search on a plain text copy of the text attibute --- .../phyll/migrations/0004_note_plain_text.py | 18 ++++++++++++++ .../phyll/migrations/0005_set_plain_text.py | 24 +++++++++++++++++++ chloro/phyll/models.py | 9 +++++++ chloro/phyll/views.py | 2 +- 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 chloro/phyll/migrations/0004_note_plain_text.py create mode 100644 chloro/phyll/migrations/0005_set_plain_text.py diff --git a/chloro/phyll/migrations/0004_note_plain_text.py b/chloro/phyll/migrations/0004_note_plain_text.py new file mode 100644 index 0000000..c756cb1 --- /dev/null +++ b/chloro/phyll/migrations/0004_note_plain_text.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.13 on 2022-07-21 20:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('phyll', '0003_note_published'), + ] + + operations = [ + migrations.AddField( + model_name='note', + name='plain_text', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/chloro/phyll/migrations/0005_set_plain_text.py b/chloro/phyll/migrations/0005_set_plain_text.py new file mode 100644 index 0000000..0e08f4c --- /dev/null +++ b/chloro/phyll/migrations/0005_set_plain_text.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.13 on 2022-07-21 20:57 + +import html + +from django.db import migrations +from django.utils.html import strip_tags + + +def set_plain_text(apps, schema_editor): + Note = apps.get_model('phyll', 'Note') + for note in Note.objects.all(): + note.plain_text = html.unescape(strip_tags(note.text)) + note.save(update_fields=['plain_text']) + + +class Migration(migrations.Migration): + + dependencies = [ + ('phyll', '0004_note_plain_text'), + ] + + operations = [ + migrations.RunPython(set_plain_text, reverse_code=migrations.RunPython.noop), + ] diff --git a/chloro/phyll/models.py b/chloro/phyll/models.py index 3723f6b..b72bfc5 100644 --- a/chloro/phyll/models.py +++ b/chloro/phyll/models.py @@ -14,7 +14,10 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import html + from django.db import models, transaction +from django.utils.html import strip_tags from django.utils.translation import ugettext_lazy as _ from taggit.managers import TaggableManager @@ -25,6 +28,7 @@ class Note(models.Model): title = models.CharField(_('Title'), max_length=150) slug = models.SlugField(_('Slug'), max_length=150) text = RichTextField(_('Text'), blank=True, null=True) + plain_text = models.TextField(blank=True, null=True) tags = TaggableManager(_('Tags'), blank=True) published = models.BooleanField(_('Published'), default=True) creation_timestamp = models.DateTimeField(auto_now_add=True) @@ -40,3 +44,8 @@ class Note(models.Model): if self.tags.filter(name='lang-en').exists(): return 'en' return 'fr' + + def save(self, *args, **kwargs): + if kwargs.get('update_fields') is None or 'plain_text' in kwargs.get('update_fields'): + self.plain_text = html.unescape(strip_tags(self.text)) + return super().save(*args, **kwargs) diff --git a/chloro/phyll/views.py b/chloro/phyll/views.py index 829020c..0a7b173 100644 --- a/chloro/phyll/views.py +++ b/chloro/phyll/views.py @@ -191,7 +191,7 @@ def ajax_new_page(request, *args, **kwargs): def ajax_search(request, *args, **kwargs): - vector = SearchVector('title', weight='A') + SearchVector('text', weight='B') + vector = SearchVector('title', weight='A') + SearchVector('plain_text', weight='B') query = SearchQuery(request.GET.get('q', ''), config='french') results = ( Note.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.3).order_by('-rank')[:10] -- 2.39.2