Merge pull request #74 from i-dot-ai/fix-tests

Fix flaky tests
i-dot-ai · Apr 11, 2024 · b68deff · b68deff
2 parents 47f37e6 + 65be7d6
commit b68deff
Show file tree

Hide file tree

Showing 12 changed files with 155 additions and 119 deletions.
diff --git a/Makefile b/Makefile
@@ -1,9 +1,12 @@
 -include .env
-export
+export AWS_ACCOUNT_ID
+export AWS_REGION
+export ECR_REPO_NAME
+export APP_NAME
 
 .PHONY: help
-help: ## Show this help
-	@grep -E '^[a-zA-Z\.\-\_]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+help:     ## Show this help.
+	@egrep -h '\s##\s' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m  %-30s\033[0m %s\n", $$1, $$2}'
 
 ## Schema documentation
 consultation_analyser/consultations/public_schema.py: consultation_analyser/consultations/public_schema/public_schema.yaml

diff --git a/consultation_analyser/consultations/dummy_data.py b/consultation_analyser/consultations/dummy_data.py
@@ -19,7 +19,16 @@ def __init__(self, responses=10, **options):
 
         consultation = ConsultationFactory(**options)
         section = SectionFactory(name="Base section", consultation=consultation)
-        questions = [QuestionFactory(question=q, section=section) for q in FakeConsultationData().all_questions()]
+        questions = [
+            QuestionFactory(
+                text=q["text"],
+                slug=q["slug"],
+                multiple_choice_options=q.get("multiple_choice_options", None),
+                has_free_text=q["has_free_text"],
+                section=section,
+            )
+            for q in FakeConsultationData().all_questions()
+        ]
         for r in range(responses):
             response = ConsultationResponseFactory(consultation=consultation)
             _answers = [AnswerFactory(question=q, consultation_response=response) for q in questions]

diff --git a/consultation_analyser/consultations/jinja2/show_question.html b/consultation_analyser/consultations/jinja2/show_question.html
@@ -10,27 +10,27 @@
       <h1 class="govuk-heading-l">{{ page_title }}</h1>
       <p class="govuk-body">{{ question.text }}</p>
 
-      {% if multiple_choice and total_responses %}
+      {% if question.multiple_choice_options %}
         <dl class="govuk-summary-list">
-          {% for item in multiple_choice %}
+          {% for response in question.multiple_choice_response_counts() %}
             <div class="govuk-summary-list__row">
               <dt class="govuk-summary-list__key">
-                {{ item.label }}
+                {{ response.answer }}
               </dt>
               <dd class="govuk-summary-list__value">
-                {{ ((item.count / total_responses) * 100)|round|int }}%
+                {{ response.percent }}%
               </dd>
             </div>
           {% endfor %}
         </dl>
       {% endif %}
     </div>
 
-    {% if multiple_choice and total_responses %}
+    {% if question.multiple_choice_options %}
       <div class="govuk-grid-column-one-third govuk-!-padding-0">
         <donut-chart>
-          {% for item in multiple_choice %}
-            <chart-item data-label="{{ item.label }}" data-count="{{ ((item.count / total_responses) * 100)|round|int }}"></chart-item>
+          {% for response in question.multiple_choice_response_counts() %}
+            <chart-item data-label="{{ response.answer }}" data-count="{{ response.percent }}"></chart-item>
           {% endfor %}
         </donut-chart>
       </div>

diff --git a/consultation_analyser/consultations/ml_pipeline.py b/consultation_analyser/consultations/ml_pipeline.py
@@ -31,7 +31,9 @@ def get_topic_model(answers_list_with_embeddings: List[Dict[str, Union[UUID, str
     embeddings_list = [answer["embedding"] for answer in answers_list_with_embeddings]
     embeddings = np.array(embeddings_list)
     # Set random_state so that we can reproduce the results
-    umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric="cosine", random_state=RANDOM_STATE)
+    umap_model = UMAP(
+        n_neighbors=15, n_components=5, min_dist=0.0, metric="cosine", n_jobs=1, random_state=RANDOM_STATE
+    )
     hdbscan_model = HDBSCAN(
         min_cluster_size=3, metric="euclidean", cluster_selection_method="eom", prediction_data=True
     )

diff --git a/consultation_analyser/consultations/models.py b/consultation_analyser/consultations/models.py
@@ -1,4 +1,7 @@
 import uuid
+from collections import Counter
+from dataclasses import dataclass
+from functools import reduce
 
 from django.db import models
 
@@ -45,6 +48,30 @@ class Question(UUIDPrimaryKeyModel, TimeStampedModel):
     multiple_choice_options = models.JSONField(null=True)
     section = models.ForeignKey(Section, on_delete=models.CASCADE)
 
+    @dataclass
+    class MultipleChoiceResponseCount:
+        answer: str
+        count: int
+        percent: float
+
+    def multiple_choice_response_counts(self) -> list[MultipleChoiceResponseCount]:
+        if not self.multiple_choice_options:
+            return []
+
+        responses: list = reduce(
+            lambda resps, answer: resps + answer.multiple_choice_responses, self.answer_set.all(), []
+        )
+        counter = Counter(responses)
+
+        # this does not support more than one choice per response
+        total_response_count = len(responses)
+        response_counts = []
+        for answer, count in counter.items():
+            percent = round((count / total_response_count) * 100)
+            response_counts.append(self.MultipleChoiceResponseCount(answer=answer, count=count, percent=percent))
+
+        return response_counts
+
     class Meta(UUIDPrimaryKeyModel.Meta, TimeStampedModel.Meta):
         constraints = [
             models.UniqueConstraint(fields=["slug", "section"], name="unique_question_section"),

diff --git a/consultation_analyser/consultations/urls.py b/consultation_analyser/consultations/urls.py
@@ -1,6 +1,6 @@
 from django.urls import path
 
-from .views import consultations, pages, questions, schema
+from .views import consultations, pages, questions, responses, schema
 
 urlpatterns = [
     path("", pages.home),
@@ -14,7 +14,8 @@
     ),
     path(
         "consultations/<str:consultation_slug>/sections/<str:section_slug>/responses/<str:question_slug>",
-        questions.show_responses,
+        responses.show,
+        name="show_question_responses",
     ),
     path("batch-example", pages.batch_example, name="batch_example"),
 ]
diff --git a/consultation_analyser/consultations/views/questions.py b/consultation_analyser/consultations/views/questions.py
@@ -16,66 +16,10 @@ def show(request: HttpRequest, consultation_slug: str, section_slug: str, questi
     total_responses = models.Answer.objects.filter(question=question).count()
     highest_theme_count = themes_for_question.aggregate(Max("answer_count"))["answer_count__max"]
 
-    # Get closed question responses (if the question has any)
-    multiple_choice = []
-    if question.multiple_choice_options:
-        multiple_choice = [
-            {
-                "label": option,
-                "count": models.Answer.objects.filter(multiple_choice_responses=option, question=question).count(),
-            }
-            for option in question.multiple_choice_options
-        ]
-
     context = {
         "question": question,
         "themes": themes_for_question,
         "highest_theme_count": highest_theme_count,
         "total_responses": total_responses,
-        "multiple_choice": multiple_choice,
     }
     return render(request, "show_question.html", context)
-
-
-def get_applied_filters(request: HttpRequest):
-    return {
-        "keyword": request.GET.get("keyword", ""),
-        "theme": request.GET.get("theme", "All"),
-        "opinion": request.GET.get("opinion", "All"),
-    }
-
-
-def get_filtered_responses(question: models.Question, applied_filters):
-    queryset = models.Answer.objects.filter(question=question, free_text__icontains=applied_filters["keyword"])
-    if applied_filters["theme"] != "All" and applied_filters["theme"] != "No theme":
-        queryset = queryset.filter(theme=applied_filters["theme"])
-    # TO DO: handle answers with "No theme"
-    if applied_filters["opinion"] != "All":
-        queryset = queryset.filter(multiple_choice_responses=applied_filters["opinion"])
-    return queryset
-
-
-def show_responses(request: HttpRequest, consultation_slug: str, section_slug: str, question_slug: str):
-    question = models.Question.objects.get(
-        slug=question_slug, section__slug=section_slug, section__consultation__slug=consultation_slug
-    )
-    themes_for_question = models.Theme.objects.filter(answer__question=question)
-    total_responses = models.Answer.objects.filter(question=question).count()
-    applied_filters = get_applied_filters(request)
-    responses = get_filtered_responses(question, applied_filters)
-
-    # pagination
-    pagination = Paginator(responses, 5)
-    page_index = request.GET.get("page", "1")
-    current_page = pagination.page(page_index)
-    paginated_responses = current_page.object_list
-
-    context = {
-        "question": question,
-        "responses": paginated_responses,
-        "total_responses": total_responses,
-        "applied_filters": applied_filters,
-        "themes_for_question": themes_for_question,
-        "pagination": current_page,
-    }
-    return render(request, "show_responses.html", context)
diff --git a/consultation_analyser/consultations/views/responses.py b/consultation_analyser/consultations/views/responses.py
@@ -0,0 +1,50 @@
+from django.core.paginator import Paginator
+from django.http import HttpRequest
+from django.shortcuts import render
+
+from .. import models
+
+
+def get_applied_filters(request: HttpRequest):
+    return {
+        "keyword": request.GET.get("keyword", ""),
+        "theme": request.GET.get("theme", "All"),
+        "opinion": request.GET.get("opinion", "All"),
+    }
+
+
+def get_filtered_responses(question: models.Question, applied_filters):
+    queryset = models.Answer.objects.filter(question=question, free_text__icontains=applied_filters["keyword"])
+    if applied_filters["theme"] != "All" and applied_filters["theme"] != "No theme":
+        queryset = queryset.filter(theme=applied_filters["theme"])
+    # TO DO: handle answers with "No theme"
+    if applied_filters["opinion"] != "All":
+        queryset = queryset.filter(multiple_choice_responses=applied_filters["opinion"])
+    return queryset
+
+
+def show(request: HttpRequest, consultation_slug: str, section_slug: str, question_slug: str):
+    question = models.Question.objects.get(
+        slug=question_slug, section__slug=section_slug, section__consultation__slug=consultation_slug
+    )
+    themes_for_question = models.Theme.objects.filter(answer__question=question)
+    total_responses = models.Answer.objects.filter(question=question).count()
+    applied_filters = get_applied_filters(request)
+    responses = get_filtered_responses(question, applied_filters)
+
+    # pagination
+    pagination = Paginator(responses, 5)
+    page_index = request.GET.get("page", "1")
+    current_page = pagination.page(page_index)
+    paginated_responses = current_page.object_list
+
+    context = {
+        "question": question,
+        "responses": paginated_responses,
+        "total_responses": total_responses,
+        "applied_filters": applied_filters,
+        "themes_for_question": themes_for_question,
+        "pagination": current_page,
+    }
+
+    return render(request, "show_responses.html", context)
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,6 +11,10 @@ readme = "README.md"
 
 [tool.pytest.ini_options]
 DJANGO_SETTINGS_MODULE = "consultation_analyser.settings.test"
+filterwarnings = [
+    "ignore::django.utils.deprecation.RemovedInDjango51Warning:.*compressor.*",
+    "ignore::pydantic.warnings.PydanticDeprecatedSince20",
+]
 
 [tool.poetry.dependencies]
 python = "^3.12"

diff --git a/tests/factories.py b/tests/factories.py
@@ -67,8 +67,6 @@ def with_question(section, creation_strategy, value, **kwargs):
             QuestionFactory(
                 section=section,
                 with_answer=kwargs.get("with_answer"),
-                with_multiple_choice=kwargs.get("with_multiple_choice"),
-                with_free_text=kwargs.get("with_free_text"),
             )
 
 
@@ -77,36 +75,18 @@ class Meta:
         model = models.Question
         skip_postgeneration_save = True
 
-    text = factory.LazyAttribute(lambda o: o.question["text"])
-    slug = factory.LazyAttribute(lambda o: o.question["slug"])
-    multiple_choice_options = factory.LazyAttribute(lambda o: o.question["multiple_choice_options"])
-    has_free_text = factory.LazyAttribute(lambda o: o.question["has_free_text"])
+    text = faker.sentence()
+    slug = faker.slug()
+    multiple_choice_options = ["Yes", "No", "Maybe"]
+    has_free_text = True
     section = factory.SubFactory(SectionFactory)
 
-    class Params:
-        question = FakeConsultationData().question()
-
     @factory.post_generation
     def with_answer(question, creation_strategy, value, **kwargs):
         if value is True:
             answer = AnswerFactory(question=question)
             answer.save()
 
-    @factory.post_generation
-    def with_multiple_choice(question, creation_strategy, value, **kwargs):
-        if value is True and question.multiple_choice_options is None:
-            question.multiple_choice_options = default_multiple_choice_options
-            question.save()
-
-    @factory.post_generation
-    def with_free_text(question, creation_strategy, value, **kwargs):
-        if value is True:
-            answer = AnswerFactory(
-                question=question,
-                with_free_text=kwargs.get("with_free_text"),
-            )
-            answer.save()
-
 
 class ConsultationResponseFactory(factory.django.DjangoModelFactory):
     consultation = factory.SubFactory(ConsultationFactory)
@@ -130,20 +110,12 @@ class Meta:
         model = models.Answer
         skip_postgeneration_save = True
 
-    free_text = factory.LazyAttribute(
-        lambda o: FakeConsultationData().get_free_text_answer(o.question.slug) if o.question.has_free_text else None
-    )
+    free_text = factory.LazyAttribute(lambda o: faker.sentence() if o.question.has_free_text else None)
 
     question = factory.SubFactory(QuestionFactory)
     consultation_response = factory.SubFactory(ConsultationResponseFactory)
     theme = factory.LazyAttribute(lambda o: ThemeFactory() if o.free_text else None)
 
     multiple_choice_responses = factory.LazyAttribute(
-        lambda o: random.choice(o.question.multiple_choice_options) if o.question.multiple_choice_options else None
+        lambda o: [random.choice(o.question.multiple_choice_options)] if o.question.multiple_choice_options else None
     )
-
-    @factory.post_generation
-    def with_free_text(answer, creation_strategy, value, **kwargs):
-        if answer.free_text is None:
-            answer.free_text = "This is a sample free-text response"
-            answer.save()
diff --git a/tests/integration/test_question_pages.py b/tests/integration/test_question_pages.py
@@ -3,34 +3,32 @@
 
 import pytest
 
-from tests.factories import ConsultationFactory
+from tests.factories import AnswerFactory, ConsultationFactory
 
 
 @pytest.mark.django_db
 def test_get_question_summary_page(django_app):
-    consultation = ConsultationFactory(
-        with_question=True, with_question__with_answer=True, with_question__with_multiple_choice=True
-    )
+    consultation = ConsultationFactory(with_question=True, with_question__with_free_text=True)
     section = consultation.section_set.first()
     question = section.question_set.first()
-    answer = question.answer_set.first()
+
+    AnswerFactory(multiple_choice_responses=["Yes"], question=question)
+    AnswerFactory(multiple_choice_responses=["Yes"], question=question)
+    AnswerFactory(multiple_choice_responses=["No"], question=question)
+    AnswerFactory(multiple_choice_responses=["Maybe"], question=question)
+
     question_summary_url = f"/consultations/{consultation.slug}/sections/{section.slug}/questions/{question.slug}"
     question_page = django_app.get(question_summary_url)
     page_content = html.unescape(str(question_page.content))
 
     assert question.text in page_content
+
+    answer = question.answer_set.first()
     assert answer.theme.summary in page_content
 
     for keyword in answer.theme.keywords:
         assert keyword in page_content
 
-    if question.multiple_choice_options:
-        for option in question.multiple_choice_options:
-            assert option in page_content
-
-    percentages = re.findall(r"\d+%", page_content)
-    count = 0
-    for percentage in percentages:
-        percentage_num = float(percentage.replace("%", ""))
-        count += percentage_num
-    assert count == 100
+    assert re.search(r"Yes\s+50%", question_page.html.text)
+    assert re.search(r"No\s+25%", question_page.html.text)
+    assert re.search(r"Maybe\s+25%", question_page.html.text)