diff --git a/backend/models.py b/backend/models.py index c3ab27ba..249638a9 100644 --- a/backend/models.py +++ b/backend/models.py @@ -737,9 +737,18 @@ def assign_annotator_task(self, user, doc_type=DocumentType.ANNOTATION): Annotation task performs an extra check for remaining annotation task (num_annotation_tasks_remaining), testing and training does not do this check as the annotator must annotate all documents. """ - if (DocumentType.ANNOTATION and self.num_annotation_tasks_remaining > 0) or \ - DocumentType.TEST or DocumentType.TRAINING: - for doc in self.documents.filter(doc_type=doc_type).order_by('?'): + if (doc_type == DocumentType.ANNOTATION and self.num_annotation_tasks_remaining > 0) or \ + doc_type == DocumentType.TEST or doc_type == DocumentType.TRAINING: + if doc_type == DocumentType.TEST or doc_type == DocumentType.TRAINING: + queryset = self.documents.filter(doc_type=doc_type).order_by('?') + else: + # Prefer documents which have fewer complete or pending annotations, in order to + # spread the annotators as evenly as possible across the available documents + queryset = self.documents.filter(doc_type=doc_type).alias( + occupied_annotations=Count("annotations", filter=Q(annotations__status=Annotation.COMPLETED) + | Q(annotations__status=Annotation.PENDING)) + ).order_by('occupied_annotations', '?') + for doc in queryset: # Check that annotator hasn't annotated and that # doc hasn't been fully annotated if doc.user_can_annotate_document(user): diff --git a/backend/tests/test_rpc_endpoints.py b/backend/tests/test_rpc_endpoints.py index 096f3e2f..45db135e 100644 --- a/backend/tests/test_rpc_endpoints.py +++ b/backend/tests/test_rpc_endpoints.py @@ -8,6 +8,7 @@ from django.utils import timezone import json +import logging from backend.models import Annotation, Document, DocumentType, Project, AnnotatorProject, UserDocumentFormatPreference from backend.rpc import create_project, update_project, add_project_document, add_document_annotation, \ @@ -28,7 +29,7 @@ from backend.tests.test_rpc_server import TestEndpoint - +LOGGER = logging.getLogger(__name__) class TestUserAuth(TestCase): @@ -1379,7 +1380,7 @@ def setUp(self): self.num_training_docs = 5 self.training_docs = [] for i in range(self.num_training_docs): - self.docs.append(Document.objects.create(project=self.proj, + self.training_docs.append(Document.objects.create(project=self.proj, doc_type=DocumentType.TRAINING, data={ "text": f"Document {i}", @@ -1396,7 +1397,7 @@ def setUp(self): self.num_test_docs = 10 self.test_docs = [] for i in range(self.num_test_docs): - self.docs.append(Document.objects.create(project=self.proj, + self.test_docs.append(Document.objects.create(project=self.proj, doc_type=DocumentType.TEST, data={ "text": f"Document {i}", @@ -1609,10 +1610,11 @@ def test_annotations_per_doc_not_enforced_for_training_or_test(self): self.proj.save() docs_annotated_per_user = [] - for (i, (ann_user, _)) in enumerate(self.annotators): + for (ann_user, _) in self.annotators: # Add to project self.assertTrue(add_project_annotator(self.manager_request, self.proj.id, ann_user.username)) + for (i, (ann_user, _)) in enumerate(self.annotators): # Every annotator should be able to complete every training document, even though # max annotations per document is less than the total number of annotators self.assertEqual(self.num_training_docs, @@ -1623,6 +1625,7 @@ def test_annotations_per_doc_not_enforced_for_training_or_test(self): self.assertEqual(self.num_training_docs, self.proj.get_annotator_document_score(ann_user, DocumentType.TRAINING)) + for (i, (ann_user, _)) in enumerate(self.annotators): # Every annotator should be able to complete every test document, even though # max annotations per document is less than the total number of annotators self.assertEqual(self.num_test_docs, @@ -1633,6 +1636,7 @@ def test_annotations_per_doc_not_enforced_for_training_or_test(self): self.assertEqual(self.num_training_docs, self.proj.get_annotator_document_score(ann_user, DocumentType.TRAINING)) + for (i, (ann_user, _)) in enumerate(self.annotators): # Now attempt to complete task normally num_annotated = self.complete_annotations(self.num_docs, "Annotation", annotator=i) docs_annotated_per_user.append(num_annotated) @@ -1662,15 +1666,30 @@ def complete_annotations(self, num_annotations_to_complete, expected_doc_type_st # Expect to get self.num_training_docs tasks num_completed_tasks = 0 + if expected_doc_type_str == 'Annotation': + all_docs = self.docs + elif expected_doc_type_str == 'Training': + all_docs = self.training_docs + else: + all_docs = self.test_docs + + annotated_docs = {doc.pk: ' ' for doc in all_docs} for i in range(num_annotations_to_complete): task_context = get_annotation_task(ann_req) if task_context: self.assertEqual(expected_doc_type_str, task_context.get("document_type"), f"Document type does not match in task {task_context!r}, " + "annotator {ann.username}, document {i}") + annotated_docs[task_context['document_id']] = "\u2714" complete_annotation_task(ann_req, task_context["annotation_id"], {"sentiment": answer}) num_completed_tasks += 1 + # Draw a nice markdown table of exactly which documents each annotator was given + if annotator == 0: + LOGGER.debug("Annotator | " + (" | ".join(str(i) for i in annotated_docs.keys()))) + LOGGER.debug(" | ".join(["--"] * (len(annotated_docs)+1))) + LOGGER.debug(ann.username + " | " + (" | ".join(str(v) for v in annotated_docs.values()))) + return num_completed_tasks class TestAnnotationChange(TestEndpoint):