Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(feat): optimise relatedArticles logic #208

Closed
wants to merge 11 commits into from
Closed
47 changes: 46 additions & 1 deletion server/apps/research/admin/article_admin.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,40 @@
from django.contrib import admin
from django import forms
from apps.research.models import Article, ArticleSlugHistory
from apps.research.models import Article, ArticleSlugHistory, RelatedArticle
from tinymce.widgets import TinyMCE
from .slug_history import current_slug_history



class RelatedArticleInline(admin.TabularInline):
model = RelatedArticle
fk_name = 'from_article'
extra = 1
max_num = 3
verbose_name = 'Related Article'
verbose_name_plural = 'Related Articles'

def formfield_for_foreignkey(self, db_field, request, **kwargs):
if db_field.name == 'to_article':
# Get the parent object (Article) from the request
obj_id = request.resolver_match.kwargs.get('object_id')
# For new articles (when obj_id is None), show all ready articles
base_queryset = Article.objects.filter(status='ready')

if obj_id:
try:
parent_obj = Article.objects.get(pk=obj_id)
# Exclude self-reference and articles that already have a relationship
base_queryset = base_queryset.exclude(
id=parent_obj.id
).exclude(
related_from__to_article=parent_obj
)
except Article.DoesNotExist:
pass

kwargs['queryset'] = base_queryset
return super().formfield_for_foreignkey(db_field, request, **kwargs)
class ArticleForm(forms.ModelForm):
class Meta:
model = Article
Expand All @@ -19,10 +48,26 @@ def __init__(self, *args, **kwargs):
class ArticleAdmin(admin.ModelAdmin):
"""Admin interface for the Article model."""
form = ArticleForm
inlines = [RelatedArticleInline]

def current_slug_history(self, obj):
return current_slug_history(obj)
current_slug_history.short_description = 'Slug Change History'

def get_inlines(self, request, obj):
# Allow inlines for both new and existing articles
return [RelatedArticleInline]

def save_related(self, request, form, formsets, change):
"""Handle saving related articles for both new and existing articles."""
super().save_related(request, form, formsets, change)

# Process related articles from inline formsets
for formset in formsets:
if isinstance(formset, RelatedArticleInline):
# The related articles will be saved automatically through the formset
pass

fieldsets = [
('Article Details', {'fields': ['title', 'slug', 'authors', 'acknowledgement', 'categories', 'thumb', 'content', 'summary', 'status', 'scheduled_publish_time']}),
('Sponsorship Details', {'fields': ['is_sponsored', 'sponsor_color', 'sponsor_text_color']}),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Generated by Django 5.0.8 on 2024-12-12 04:31

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('research', '0014_alter_article_authors'),
]

operations = [
migrations.CreateModel(
name='RelatedArticle',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created_at', models.DateTimeField(auto_now_add=True)),
('from_article', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_from', to='research.article')),
('to_article', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_to', to='research.article')),
],
),
migrations.AddField(
model_name='article',
name='related_articles',
field=models.ManyToManyField(blank=True, related_name='referenced_by', through='research.RelatedArticle', to='research.article'),
),
migrations.AddConstraint(
model_name='relatedarticle',
constraint=models.CheckConstraint(check=models.Q(('from_article', models.F('to_article')), _negated=True), name='prevent_self_reference'),
),
losndu marked this conversation as resolved.
Show resolved Hide resolved
migrations.AlterUniqueTogether(
name='relatedarticle',
unique_together={('from_article', 'to_article')},
),
]
2 changes: 1 addition & 1 deletion server/apps/research/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .category import Category
from .author import Author
from .article import Article, ArticleSlugHistory
from .article import Article, ArticleSlugHistory, RelatedArticle
188 changes: 153 additions & 35 deletions server/apps/research/models/article.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models
from django.utils.text import slugify
from django.core.exceptions import ValidationError
from apps.common.models import BaseModel
from apps.research.managers import ArticleObjects
from .category import Category
Expand All @@ -12,28 +13,41 @@
import uuid
from django.db import transaction


def get_default_thumb():
return f"{settings.MEDIA_URL}images/2077-Collective.png"


class Article(BaseModel):
"""Model for articles."""

options = (
('draft', 'Draft'),
('ready', 'Ready'),
("draft", "Draft"),
("ready", "Ready"),
)

title = models.TextField()
content = HTMLField(blank=True, null=True)
summary = models.TextField(blank=True)
acknowledgement = HTMLField(blank=True, null=True)
authors = models.ManyToManyField(Author, blank=True, related_name='articles')
authors = models.ManyToManyField(Author, blank=True, related_name="articles")
slug = models.SlugField(max_length=255, blank=True, db_index=True)
categories = models.ManyToManyField(Category, blank=True, related_name='articles')
thumb = models.ImageField(upload_to='images/', default=get_default_thumb, blank=True)
categories = models.ManyToManyField(Category, blank=True, related_name="articles")
related_articles = models.ManyToManyField(
"self",
blank=True,
symmetrical=False,
related_name="referenced_by",
through="RelatedArticle",
)
thumb = models.ImageField(
upload_to="images/", default=get_default_thumb, blank=True
)
views = models.PositiveBigIntegerField(default=0)
status = models.CharField(max_length=10, choices=options, default='draft', db_index=True)
scheduled_publish_time = models.DateTimeField(null=True, blank=True, db_index=True)
status = models.CharField(
max_length=10, choices=options, default="draft", db_index=True
)
scheduled_publish_time = models.DateTimeField(null=True, blank=True, db_index=True)
table_of_contents = models.JSONField(default=list, blank=True)
is_sponsored = models.BooleanField(default=False)
sponsor_color = models.CharField(max_length=7, default="#FF0420")
Expand All @@ -43,8 +57,31 @@ class Article(BaseModel):
post_objects = ArticleObjects()

class Meta:
ordering = ('-scheduled_publish_time',)

ordering = ("-scheduled_publish_time",)

def get_related_articles(self):
"""
Get manually selected related articles for this article.
If none exist, fallback to default frontend logic
"""

manual_related_articles = self.related_articles.filter(status="ready").order_by(
"-scheduled_publish_time"
)[:3]

if manual_related_articles.exists():
return manual_related_articles

# Fallback to last 3 articles in the same category

category_articles = (
Article.objects.filter(categories__in=self.categories.all(), status="ready")
.exclude(id=self.id)
.order_by("-scheduled_publish_time")
.distinct()[:3]
)
return category_articles

def calculate_min_read(self):
word_count = len(self.content.split())
words_per_minute = 300 # Average reading speed (words per minute)
Expand All @@ -58,30 +95,38 @@ def __str__(self):

def build_table_of_contents(self):
"""Build the table of contents from the article content."""
soup = BeautifulSoup(self.content, 'html.parser')
headers = soup.find_all(['h1', 'h2', 'h3'])
soup = BeautifulSoup(self.content, "html.parser")
headers = soup.find_all(["h1", "h2", "h3"])

toc = []
stack = [{'level': 0, 'children': toc}]
stack = [{"level": 0, "children": toc}]

for header in headers:
level = int(header.name[1])
title = header.get_text()
header['id'] = slugify(title)
header["id"] = slugify(title)

while level <= stack[-1]['level']:
while level <= stack[-1]["level"]:
stack.pop()

new_item = {'title': title, 'id': header['id'], 'children': []}
stack[-1]['children'].append(new_item)
stack.append({'level': level, 'children': new_item['children']})
new_item = {"title": title, "id": header["id"], "children": []}

stack[-1]["children"].append(new_item)
stack.append({"level": level, "children": new_item["children"]})

self.table_of_contents = toc
self.content = str(soup)

def save(self, *args, **kwargs):
"""Override the save method to generate a unique slug and build table of contents."""
is_new = self.pk is None
temp_related_articles = []

# If this is a new article and there are related articles in the form
if is_new and hasattr(self, "_temp_related_articles"):
temp_related_articles = self._temp_related_articles
delattr(self, "_temp_related_articles")

if not self.slug or self.title_update():
self.slug = self.generate_unique_slug()

Expand All @@ -92,25 +137,45 @@ def save(self, *args, **kwargs):
# Generate new slug first
if not self.slug or self.title_update():
self.slug = self.generate_unique_slug()

# Then check if we need to create slug history
if old_instance.slug and old_instance.slug != self.slug:
# Only create history if the slug actually changed and isn't empty
with transaction.atomic():
ArticleSlugHistory.objects.create(
article=self,
old_slug=old_instance.slug
article=self, old_slug=old_instance.slug
)
except Article.DoesNotExist:
pass
pass

if self.content:
self.build_table_of_contents()

if self.scheduled_publish_time and self.status == 'draft' and timezone.now() >= self.scheduled_publish_time:
self.status = 'ready'

super().save(*args, **kwargs)
if (
self.scheduled_publish_time
and self.status == "draft"
and timezone.now() >= self.scheduled_publish_time
):
self.status = "ready"

losndu marked this conversation as resolved.
Show resolved Hide resolved
with transaction.atomic():
super().save(*args, **kwargs)

# If this was a new article and we had temporary related articles
if is_new and temp_related_articles:
for related_article in temp_related_articles:
RelatedArticle.objects.create(
from_article=self, to_article=related_article
)

def set_temp_related_articles(self, related_articles):
"""
Store related articles temporarily before the initial save.

Args:
related_articles: List of Article instances to be related
"""
self._temp_related_articles = related_articles

def generate_unique_slug(self):
"""Generate a unique slug for the article."""
Expand All @@ -122,28 +187,81 @@ def generate_unique_slug(self):
slug = f"{base_slug}-{num}"
num += 1
return slug

def title_update(self):
"""Check if the title has changed."""
if self.pk: # Only check if the article exists
original = Article.objects.filter(pk=self.pk).only('title').first()
original = Article.objects.filter(pk=self.pk).only("title").first()
if original:
return original.title != self.title
return False


class RelatedArticle(models.Model):
"""Through model for related articles to prevent circular references."""

from_article = models.ForeignKey(
Article, on_delete=models.CASCADE, related_name="related_from"
)
to_article = models.ForeignKey(
Article, on_delete=models.CASCADE, related_name="related_to"
)
created_at = models.DateTimeField(auto_now_add=True)

class Meta:
unique_together = ("from_article", "to_article")
constraints = [
models.CheckConstraint(
check=~models.Q(from_article=models.F("to_article")),
name="prevent_self_reference",
)
]

def clean(self):
# Prevent direct circular references
if (
self.from_article.pk
and RelatedArticle.objects.filter(
from_article=self.to_article, to_article=self.from_article
).exists()
):
raise ValidationError("Circular references detected.")

# Maximum of 3 related articles
if (
self.from_article.pk
and RelatedArticle.objects.filter(from_article=self.from_article).count()
>= 3
):
raise ValidationError("Maximum of 3 related articles allowed.")

def save(self, *args, **kwargs):
with transaction.atomic():
# Only acquire lock if from_article exists in database
if self.from_article.pk:
RelatedArticle.objects.select_for_update().filter(
from_article=self.from_article
).exists()
self.clean()
super().save(*args, **kwargs)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Ensure atomicity when enforcing constraints in RelatedArticle.save

Using select_for_update().exists() may not effectively lock the necessary rows to prevent race conditions. There's a potential for exceeding the maximum of 3 related articles between checking the count and saving the new RelatedArticle. To ensure data integrity, consider enforcing this constraint at the database level using a CheckConstraint or revising the transaction handling.

Consider adding a database-level constraint:

constraints = [
    models.CheckConstraint(
        check=models.Q(from_article__related_from__count__lte=3),
        name='max_related_articles_per_article',
    ),
    # Existing constraints...
]

Or adjust the transaction to lock the relevant rows properly.


class ArticleSlugHistory(models.Model):
"""Model to track historical slugs for articles."""

id = models.AutoField(primary_key=True)
article = models.ForeignKey('Article', on_delete=models.CASCADE, related_name='slug_history')
article = models.ForeignKey(
"Article", on_delete=models.CASCADE, related_name="slug_history"
)
old_slug = models.SlugField(max_length=255, db_index=True)
created_at = models.DateTimeField(auto_now_add=True)

class Meta:
unique_together = ('article', 'old_slug')
unique_together = ("article", "old_slug")
indexes = [
models.Index(fields=['old_slug']),
models.Index(fields=["old_slug"]),
]
db_table = 'research_articleslughistory' # explicitly set table name
db_table = "research_articleslughistory" # explicitly set table name

def __str__(self):
return f"{self.old_slug} -> {self.article.slug}"
Loading
Loading