Skip to content

Commit

Permalink
Merge pull request #384 from GateNLP/spread-docs-evenly
Browse files Browse the repository at this point in the history
Attempt to spread documents more evenly across annotators
  • Loading branch information
ianroberts authored Aug 4, 2023
2 parents 44098c8 + 04118ad commit a9c28d8
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 7 deletions.
15 changes: 12 additions & 3 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,9 +737,18 @@ def assign_annotator_task(self, user, doc_type=DocumentType.ANNOTATION):
Annotation task performs an extra check for remaining annotation task (num_annotation_tasks_remaining),
testing and training does not do this check as the annotator must annotate all documents.
"""
if (DocumentType.ANNOTATION and self.num_annotation_tasks_remaining > 0) or \
DocumentType.TEST or DocumentType.TRAINING:
for doc in self.documents.filter(doc_type=doc_type).order_by('?'):
if (doc_type == DocumentType.ANNOTATION and self.num_annotation_tasks_remaining > 0) or \
doc_type == DocumentType.TEST or doc_type == DocumentType.TRAINING:
if doc_type == DocumentType.TEST or doc_type == DocumentType.TRAINING:
queryset = self.documents.filter(doc_type=doc_type).order_by('?')
else:
# Prefer documents which have fewer complete or pending annotations, in order to
# spread the annotators as evenly as possible across the available documents
queryset = self.documents.filter(doc_type=doc_type).alias(
occupied_annotations=Count("annotations", filter=Q(annotations__status=Annotation.COMPLETED)
| Q(annotations__status=Annotation.PENDING))
).order_by('occupied_annotations', '?')
for doc in queryset:
# Check that annotator hasn't annotated and that
# doc hasn't been fully annotated
if doc.user_can_annotate_document(user):
Expand Down
27 changes: 23 additions & 4 deletions backend/tests/test_rpc_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from django.utils import timezone
import json
import logging

from backend.models import Annotation, Document, DocumentType, Project, AnnotatorProject, UserDocumentFormatPreference
from backend.rpc import create_project, update_project, add_project_document, add_document_annotation, \
Expand All @@ -28,7 +29,7 @@
from backend.tests.test_rpc_server import TestEndpoint



LOGGER = logging.getLogger(__name__)

class TestUserAuth(TestCase):

Expand Down Expand Up @@ -1379,7 +1380,7 @@ def setUp(self):
self.num_training_docs = 5
self.training_docs = []
for i in range(self.num_training_docs):
self.docs.append(Document.objects.create(project=self.proj,
self.training_docs.append(Document.objects.create(project=self.proj,
doc_type=DocumentType.TRAINING,
data={
"text": f"Document {i}",
Expand All @@ -1396,7 +1397,7 @@ def setUp(self):
self.num_test_docs = 10
self.test_docs = []
for i in range(self.num_test_docs):
self.docs.append(Document.objects.create(project=self.proj,
self.test_docs.append(Document.objects.create(project=self.proj,
doc_type=DocumentType.TEST,
data={
"text": f"Document {i}",
Expand Down Expand Up @@ -1609,10 +1610,11 @@ def test_annotations_per_doc_not_enforced_for_training_or_test(self):
self.proj.save()

docs_annotated_per_user = []
for (i, (ann_user, _)) in enumerate(self.annotators):
for (ann_user, _) in self.annotators:
# Add to project
self.assertTrue(add_project_annotator(self.manager_request, self.proj.id, ann_user.username))

for (i, (ann_user, _)) in enumerate(self.annotators):
# Every annotator should be able to complete every training document, even though
# max annotations per document is less than the total number of annotators
self.assertEqual(self.num_training_docs,
Expand All @@ -1623,6 +1625,7 @@ def test_annotations_per_doc_not_enforced_for_training_or_test(self):
self.assertEqual(self.num_training_docs,
self.proj.get_annotator_document_score(ann_user, DocumentType.TRAINING))

for (i, (ann_user, _)) in enumerate(self.annotators):
# Every annotator should be able to complete every test document, even though
# max annotations per document is less than the total number of annotators
self.assertEqual(self.num_test_docs,
Expand All @@ -1633,6 +1636,7 @@ def test_annotations_per_doc_not_enforced_for_training_or_test(self):
self.assertEqual(self.num_training_docs,
self.proj.get_annotator_document_score(ann_user, DocumentType.TRAINING))

for (i, (ann_user, _)) in enumerate(self.annotators):
# Now attempt to complete task normally
num_annotated = self.complete_annotations(self.num_docs, "Annotation", annotator=i)
docs_annotated_per_user.append(num_annotated)
Expand Down Expand Up @@ -1662,15 +1666,30 @@ def complete_annotations(self, num_annotations_to_complete, expected_doc_type_st

# Expect to get self.num_training_docs tasks
num_completed_tasks = 0
if expected_doc_type_str == 'Annotation':
all_docs = self.docs
elif expected_doc_type_str == 'Training':
all_docs = self.training_docs
else:
all_docs = self.test_docs

annotated_docs = {doc.pk: ' ' for doc in all_docs}
for i in range(num_annotations_to_complete):
task_context = get_annotation_task(ann_req)
if task_context:
self.assertEqual(expected_doc_type_str, task_context.get("document_type"),
f"Document type does not match in task {task_context!r}, " +
"annotator {ann.username}, document {i}")
annotated_docs[task_context['document_id']] = "\u2714"
complete_annotation_task(ann_req, task_context["annotation_id"], {"sentiment": answer})
num_completed_tasks += 1

# Draw a nice markdown table of exactly which documents each annotator was given
if annotator == 0:
LOGGER.debug("Annotator | " + (" | ".join(str(i) for i in annotated_docs.keys())))
LOGGER.debug(" | ".join(["--"] * (len(annotated_docs)+1)))
LOGGER.debug(ann.username + " | " + (" | ".join(str(v) for v in annotated_docs.values())))

return num_completed_tasks

class TestAnnotationChange(TestEndpoint):
Expand Down

0 comments on commit a9c28d8

Please sign in to comment.