Skip to content

Commit

Permalink
Merge pull request #66 from i-dot-ai/feature/theme_model_changes
Browse files Browse the repository at this point in the history
Feature/theme model changes
  • Loading branch information
duncanjbrown authored Apr 15, 2024
2 parents 0495948 + a0f9cfa commit 39098d1
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 5.0.4 on 2024-04-05 14:40

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("consultations", "0002_alter_theme_options_theme_question_and_more"),
]

operations = [
migrations.RemoveConstraint(
model_name="theme",
name="unique_up_to_question",
),
migrations.AddField(
model_name="theme",
name="is_outlier",
field=models.BooleanField(default=False, editable=False),
),
migrations.AlterField(
model_name="theme",
name="keywords",
field=models.JSONField(default=list, editable=False),
),
migrations.AddConstraint(
model_name="theme",
constraint=models.UniqueConstraint(fields=("label", "question"), name="unique_up_to_question"),
),
]
5 changes: 2 additions & 3 deletions consultation_analyser/consultations/ml_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,15 @@ def get_answers_and_topics(topic_model: BERTopic, answers_list: List[Dict[str, U
# Assign topics to answers
answers_df = topic_model.get_document_info(free_text_responses)
answers_df["id"] = answers_id_list
answers_df = answers_df[["id", "Topic", "Name", "Representation"]]
answers_df = answers_df[["id", "Name"]]
return answers_df


def save_themes_to_answers(answers_topics_df: pd.DataFrame) -> None:
for row in answers_topics_df.itertuples():
answer = models.Answer.objects.get(id=row.id)
theme_label = row.Name
theme_keywords = row.Representation
answer.save_theme_to_answer(theme_label=theme_label, theme_keywords=theme_keywords)
answer.save_theme_to_answer(theme_label=theme_label)


def save_themes_for_question(question: models.Question) -> None:
Expand Down
18 changes: 14 additions & 4 deletions consultation_analyser/consultations/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,28 @@ class Meta(UUIDPrimaryKeyModel.Meta, TimeStampedModel.Meta):


class Theme(UUIDPrimaryKeyModel, TimeStampedModel):
# Label summarises a topic from a topic model for a given question
label = models.CharField(max_length=256, blank=True)
summary = models.TextField(blank=True)
keywords = models.JSONField(default=list)
# Duplicates info in Answer model, but needed for uniqueness constraint.
question = models.ForeignKey(Question, on_delete=models.CASCADE, null=True)

# Calculated fields
keywords = models.JSONField(default=list, editable=False)
is_outlier = models.BooleanField(default=False, editable=False)

class Meta:
constraints = [
models.UniqueConstraint(fields=["summary", "label", "keywords", "question"], name="unique_up_to_question"),
models.UniqueConstraint(fields=["label", "question"], name="unique_up_to_question"),
]

def save(self, *args, **kwargs):
label_constituents = self.label.split("_")
self.keywords = label_constituents[1:]
topic_number = label_constituents[0]
self.is_outlier = topic_number == "-1"
super().save(*args, **kwargs)


class Answer(UUIDPrimaryKeyModel, TimeStampedModel):
multiple_choice_responses = models.JSONField(null=True) # Multiple choice can have more than one response
Expand All @@ -108,12 +119,11 @@ class Answer(UUIDPrimaryKeyModel, TimeStampedModel):
class Meta(UUIDPrimaryKeyModel.Meta, TimeStampedModel.Meta):
pass

def save_theme_to_answer(self, theme_label, theme_keywords):
def save_theme_to_answer(self, theme_label):
question = self.question
theme, _ = Theme.objects.get_or_create(
question=question,
label=theme_label,
keywords=theme_keywords,
)
self.theme = theme
self.save()
12 changes: 10 additions & 2 deletions tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
default_multiple_choice_options = ["Yes", "No", "Not sure"]


def generate_dummy_topic_label():
dummy_sentence = faker.sentence()
words = dummy_sentence.lower().strip(".")
words_joined = words.replace(" ", "_")
topic_number = random.randint(-1, 3)
output = f"{topic_number}_{words_joined}"
return output


class FakeConsultationData:
def __init__(self):
with open("./tests/examples/questions.yml", "r") as f:
Expand Down Expand Up @@ -100,9 +109,8 @@ class Meta:
model = models.Theme

# TODO - may need to be changed once ML pipeline is in
label = faker.sentence()
label = factory.LazyAttribute(lambda _o: generate_dummy_topic_label())
summary = f"Summary: {label}"
keywords = label.lower().strip(".").split(" ")


class AnswerFactory(factory.django.DjangoModelFactory):
Expand Down
3 changes: 0 additions & 3 deletions tests/unit/test_ml_pipeline_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@ def test_save_themes_to_answers():
answers_df = pd.DataFrame(
{
"id": [answer1.id, answer2.id, answer3.id],
"Topic": [-1, 0, 0],
"Name": ["-1_x_y", "0_m_n", "0_m_n"],
"Representation": [["x", "y"], ["m", "n"], ["m", "n"]],
}
)
ml_pipeline.save_themes_to_answers(answers_df)
themes_qs = models.Theme.objects.filter(question=question)
assert themes_qs.count() == 2
assert "-1_x_y" in themes_qs.values_list("label", flat=True)
assert themes_qs.get(label="0_m_n").keywords == ["m", "n"]
18 changes: 10 additions & 8 deletions tests/unit/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,23 @@
from tests import factories


@pytest.mark.parametrize(
"label,expected_keywords,is_outlier", [("0_key_lock", ["key", "lock"], False), ("-1_dog_cat", ["dog", "cat"], True)]
)
@pytest.mark.django_db
def test_save_theme_to_answer():
def test_save_theme_to_answer(label, expected_keywords, is_outlier):
question = factories.QuestionFactory(has_free_text=True)
answer = factories.AnswerFactory(question=question, theme=None)
keywords = ["key", "lock"]
label = "0_key_lock"
# Check theme created and saved to answer
answer.save_theme_to_answer(theme_label=label, theme_keywords=keywords)
theme = models.Theme.objects.get(keywords=keywords, label=label)
assert theme.keywords == keywords
answer.save_theme_to_answer(theme_label=label)
theme = models.Theme.objects.get(keywords=expected_keywords, label=label)
assert theme.keywords == expected_keywords
assert theme.label == label
assert theme.is_outlier == is_outlier
assert answer.theme.label == label
# Check no duplicate created
answer.save_theme_to_answer(theme_label=label, theme_keywords=keywords)
themes_qs = models.Theme.objects.filter(keywords=keywords, label=label)
answer.save_theme_to_answer(theme_label=label)
themes_qs = models.Theme.objects.filter(keywords=expected_keywords, label=label)
assert themes_qs.count() == 1


Expand Down

0 comments on commit 39098d1

Please sign in to comment.