Skip to content

Commit

Permalink
feat: Try to populate lms_user_id on assignments during creation
Browse files Browse the repository at this point in the history
This is an attempt to cover the case where learners are already
logged in at the moment of assignment.  This case is a hole left by the
work in ENT-7875 which only set out to cover the case where a learner
was logged out at the moment of assignment.

ENT-7874
  • Loading branch information
pwnage101 committed Nov 3, 2023
1 parent 9463b0d commit f075a40
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 5 deletions.
85 changes: 82 additions & 3 deletions enterprise_access/apps/content_assignments/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,26 @@
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import CourseLocator

from enterprise_access.apps.core.models import User
from enterprise_access.apps.subsidy_access_policy.content_metadata_api import get_and_cache_content_metadata

from .constants import LearnerContentAssignmentStateChoices
from .models import AssignmentConfiguration, LearnerContentAssignment
from .utils import chunks

logger = logging.getLogger(__name__)

# The number of emails we are allowed to filter on in a single User request.
#
# Batch size derivation inputs:
# * The old MySQL client limit was 1 MB for a long time.
# * 254 is the maximum number of characters in an email.
# * 258 is the length an email plus 4 character delimiter: `', '`
# * Divide result by 10 in case we are off by an order of magnitude.
#
# Batch size derivation formula: ((1 MB) / (258 B)) / 10 ≈ 350
USER_EMAIL_READ_BATCH_SIZE = 350


class AllocationException(Exception):
"""
Expand Down Expand Up @@ -233,6 +246,11 @@ def allocate_assignments(assignment_configuration, learner_emails, content_key,
# new assignments for these.
learner_emails_with_existing_assignments = set()

# Try to populate lms_user_id field on any existing assignments found. We already ran this function on these
# assignments (when they were created in a prior request), but time has passed since then so the outcome might be
# different this time. It's technically possible some learners have registered since the last request.
assignments_with_updated_lms_user_id = _try_populate_assignments_lms_user_id(existing_assignments)

# Split up the existing assignment records by state
for assignment in existing_assignments:
learner_emails_with_existing_assignments.add(assignment.learner_email)
Expand All @@ -244,10 +262,20 @@ def allocate_assignments(assignment_configuration, learner_emails, content_key,
else:
already_allocated_or_accepted.append(assignment)

# These two sets of updated assignments may contain duplicates when combined. Since the duplicates are just
# references to the same assignment object, and django model instances are hashable (on PK), they can be
# de-duplicated using set union.
existing_assignments_to_update = set(cancelled_or_errored_to_update).union(assignments_with_updated_lms_user_id)

# Bulk update and get a list of refreshed objects
updated_assignments = _update_and_refresh_assignments(
cancelled_or_errored_to_update,
['content_quantity', 'state']
existing_assignments_to_update,
[
# `lms_user_id` is updated via the _try_populate_assignments_lms_user_id() function.
'lms_user_id',
# `content_quantity` and `state` are updated via the for-loop above.
'content_quantity', 'state',
]
)

# Narrow down creation list of learner emails
Expand Down Expand Up @@ -294,10 +322,53 @@ def _get_content_title(assignment_configuration, content_key):
return content_metadata.get('title')


def _try_populate_assignments_lms_user_id(assignments):
"""
For all given assignments, try to populate the lms_user_id field based on a matching User.
Notes:
* This function does NOT save() the assignment record.
* This is a best-effort only; most of the time a User will not exist for an assignment, and this function is a no-op
for that assignment.
* If multiple User records match based on email, choice is non-deterministic.
* Performance: This results in one or more reads against the User model. The chunk size has been tuned to minimize
the number of reads while simultaneously avoiding hard limits on statement length.
Args:
assignments (list of LearnerContentAssignment):
The unsaved assignments on which to update the lms_user_id field.
Returns:
list of LearnerContentAssignment: A non-strict subset of the input assignments, only the ones updated.
"""
# only operate on assignments that actually need to be updated.
assignments_with_empty_lms_user_id = [assignment for assignment in assignments if assignment.lms_user_id is None]

assignments_to_save = []

for assignment_chunk in chunks(assignments_with_empty_lms_user_id, USER_EMAIL_READ_BATCH_SIZE):
emails = [assignment.learner_email for assignment in assignment_chunk]
# Construct a list of tuples containing (email, lms_user_id) for every assignment in this chunk.
email_lms_user_id = User.objects.filter(
email__in=emails, # this is the part that could exceed max statement length if batch size is too large.
lms_user_id__isnull=False,
).values_list('email', 'lms_user_id')
# dict() on a list of 2-tuples treats the first elements as keys and second elements as values.
lms_user_id_by_email = dict(email_lms_user_id)
for assignment in assignment_chunk:
lms_user_id = lms_user_id_by_email.get(assignment.learner_email)
if lms_user_id:
assignment.lms_user_id = lms_user_id
assignments_to_save.append(assignment)

return assignments_to_save


def _create_new_assignments(assignment_configuration, learner_emails, content_key, content_quantity):
"""
Helper to bulk save new LearnerContentAssignment instances.
"""
# First, prepare assignment objects using data available in-memory only.
assignments_to_create = []
for learner_email in learner_emails:
content_title = _get_content_title(assignment_configuration, content_key)
Expand All @@ -309,9 +380,17 @@ def _create_new_assignments(assignment_configuration, learner_emails, content_ke
content_quantity=content_quantity,
state=LearnerContentAssignmentStateChoices.ALLOCATED,
)
assignment.full_clean()
assignments_to_create.append(assignment)

# Next, try to populate the lms_user_id field on all assignments to be created (resulting in reads against User).
# Note: This covers the case where an admin assigns content to a learner AFTER they register. For the case where an
# admin assigns content to a learner BEFORE they register, see the User post_save hook implemented in signals.py.
_try_populate_assignments_lms_user_id(assignments_to_create)

# Validate all assignments to be created.
for assignment in assignments_to_create:
assignment.full_clean()

# Do the bulk creation to save these records
return LearnerContentAssignment.bulk_create(assignments_to_create)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class Meta:

class LearnerContentAssignmentFactory(factory.django.DjangoModelFactory):
"""
Base Test factory for the ``LearnerContentAssisgnment`` model.
Base Test factory for the ``LearnerContentAssignment`` model.
"""
class Meta:
model = LearnerContentAssignment
Expand Down
158 changes: 157 additions & 1 deletion enterprise_access/apps/content_assignments/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,25 @@
import ddt
from django.test import TestCase

from enterprise_access.apps.core.models import User
from enterprise_access.apps.core.tests.factories import UserFactory

from ..api import (
AllocationException,
_try_populate_assignments_lms_user_id,
allocate_assignments,
cancel_assignments,
get_allocated_quantity_for_configuration,
get_assignment_for_learner,
get_assignments_for_configuration
)
from ..constants import LearnerContentAssignmentStateChoices
from ..models import AssignmentConfiguration
from ..models import AssignmentConfiguration, LearnerContentAssignment
from .factories import LearnerContentAssignmentFactory

# This is normally much larger (350), but that blows up the test duration.
TEST_USER_EMAIL_READ_BATCH_SIZE = 4


@ddt.ddt
class TestContentAssignmentApi(TestCase):
Expand Down Expand Up @@ -299,6 +306,7 @@ def test_allocate_assignments_happy_path(self, mock_get_and_cache_content_metada
created_assignment = allocation_results['created'][0]
self.assertEqual(created_assignment.assignment_configuration, self.assignment_configuration)
self.assertEqual(created_assignment.learner_email, '[email protected]')
self.assertEqual(created_assignment.lms_user_id, None)
self.assertEqual(created_assignment.content_key, content_key)
self.assertEqual(created_assignment.content_title, content_title)
self.assertEqual(created_assignment.content_quantity, -1 * content_price_cents)
Expand Down Expand Up @@ -366,3 +374,151 @@ def test_cancel_assignments_happy_path(self):
self.assertEqual(accepted_assignment.state, LearnerContentAssignmentStateChoices.ACCEPTED)
self.assertEqual(cancelled_assignment.state, LearnerContentAssignmentStateChoices.CANCELLED)
self.assertEqual(errored_assignment.state, LearnerContentAssignmentStateChoices.CANCELLED)

@mock.patch(
'enterprise_access.apps.content_assignments.api.get_and_cache_content_metadata',
return_value=mock.MagicMock(),
)
@ddt.data(
{
'user_exists': True,
'existing_assignment_state': None,
},
{
'user_exists': False,
'existing_assignment_state': None,
},
{
'user_exists': True,
'existing_assignment_state': LearnerContentAssignmentStateChoices.ALLOCATED,
},
{
'user_exists': False,
'existing_assignment_state': LearnerContentAssignmentStateChoices.ALLOCATED,
},
{
'user_exists': True,
'existing_assignment_state': LearnerContentAssignmentStateChoices.ACCEPTED,
},
{
'user_exists': False,
'existing_assignment_state': LearnerContentAssignmentStateChoices.ACCEPTED,
},
{
'user_exists': True,
'existing_assignment_state': LearnerContentAssignmentStateChoices.CANCELLED,
},
{
'user_exists': False,
'existing_assignment_state': LearnerContentAssignmentStateChoices.CANCELLED,
},
{
'user_exists': True,
'existing_assignment_state': LearnerContentAssignmentStateChoices.ERRORED,
},
{
'user_exists': False,
'existing_assignment_state': LearnerContentAssignmentStateChoices.ERRORED,
},
)
@ddt.unpack
def test_allocate_assignments_set_lms_user_id(
self,
mock_get_and_cache_content_metadata,
user_exists,
existing_assignment_state,
):
"""
Tests that allocating assignments correctly sets the lms_user_id when a user pre-exists with a matching email.
"""
content_key = 'demoX'
content_title = 'edx: Demo 101'
content_price_cents = 100
learner_email = '[email protected]'
lms_user_id = 999
mock_get_and_cache_content_metadata.return_value = {
'title': content_title,
}

if user_exists:
UserFactory(username='alice', email=learner_email, lms_user_id=lms_user_id)

assignment = None
if existing_assignment_state:
assignment = LearnerContentAssignmentFactory.create(
assignment_configuration=self.assignment_configuration,
learner_email=learner_email,
lms_user_id=None,
content_key=content_key,
content_title=content_title,
content_quantity=-content_price_cents,
state=existing_assignment_state,
)

allocate_assignments(
self.assignment_configuration,
[learner_email],
content_key,
content_price_cents,
)

# Get the latest assignment from the db.
assignment = LearnerContentAssignment.objects.get(learner_email=learner_email)

# We should have updated the lms_user_id of the assignment IFF a user pre-existed.
if user_exists:
assert assignment.lms_user_id == lms_user_id
else:
assert assignment.lms_user_id is None

@mock.patch(
'enterprise_access.apps.content_assignments.api.USER_EMAIL_READ_BATCH_SIZE',
TEST_USER_EMAIL_READ_BATCH_SIZE,
)
def test_try_populate_assignments_lms_user_id_with_batching(self):
"""
Tests the _try_populate_assignments_lms_user_id() function, especially the read batching feature.
Make 2*N users with unique emails,
Call function on:
2*N assignments with existing emails,
and 0.5*N assignment with new emails,
Where N is the max batch size.
"""
num_users_to_create = 2 * TEST_USER_EMAIL_READ_BATCH_SIZE
existing_users = [UserFactory() for _ in range(num_users_to_create)]

assignments_for_non_existing_users = [
LearnerContentAssignmentFactory.create(
assignment_configuration=self.assignment_configuration,
lms_user_id=None,
state=LearnerContentAssignmentStateChoices.ALLOCATED,
)
for _ in range(int(0.5 * TEST_USER_EMAIL_READ_BATCH_SIZE))
]
assignments_for_existing_users = [
LearnerContentAssignmentFactory.create(
assignment_configuration=self.assignment_configuration,
learner_email=existing_user.email,
lms_user_id=None,
state=LearnerContentAssignmentStateChoices.ALLOCATED,
)
for existing_user in existing_users
]
# Assemble a list of assignments 2.5 longer than the max batch size.
all_assignments = assignments_for_non_existing_users + assignments_for_existing_users

_try_populate_assignments_lms_user_id(all_assignments)

# save() and update all assignments from the db.
for assignment in all_assignments:
assignment.save()
assignment.refresh_from_db()

# We should not have updated the lms_user_id of the assignment if no user existed.
for assignment in assignments_for_non_existing_users:
assert assignment.lms_user_id is None

# We should have updated the lms_user_id of the assignment if a user existed.
for assignment in assignments_for_existing_users:
assert assignment.lms_user_id == User.objects.get(email=assignment.learner_email).lms_user_id
11 changes: 11 additions & 0 deletions enterprise_access/apps/content_assignments/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
Utility functions for the content_assignments app.
"""


def chunks(a_list, chunk_size):
"""
Helper to break a list up into chunks. Returns a generator of lists.
"""
for i in range(0, len(a_list), chunk_size):
yield a_list[i:i + chunk_size]

0 comments on commit f075a40

Please sign in to comment.