Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: handle dupe course staff mgmt command #294

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions edx_exams/apps/core/management/commands/bulk_add_course_staff.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,32 +63,34 @@ def add_course_staff_from_csv(self, csv_file, batch_size, batch_delay):
Add the given set of course staff provided in csv
"""
reader = list(csv.DictReader(csv_file))
users_to_create = []
users_existing = {u.username for u in User.objects.filter(username__in=[r.get('username') for r in reader])}
for row in reader:
if row.get('username') not in users_existing:
users_to_create.append(row)
users_existing.add(row.get('username'))

# bulk create users
for i in range(0, len(users_to_create), batch_size):
for i in range(0, len(reader), batch_size):
User.objects.bulk_create(
User(
username=user.get('username'),
email=user.get('email'),
)
for user in users_to_create[i:i + batch_size]
(User(
username=row.get('username'),
email=row.get('email'),
) for row in reader[i:i + batch_size]),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this line generate indexing errors? For example, the i+batch_size > len(reader) condition?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call, let me add testing/handling for that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I did add tests to test that this still works. Also, I believe that this works because of this line: for i in range(0, len(reader), batch_size). Since this is incremented by batch_size, the line for row in reader[i:i + batch_size]) won't actually get to the index error. I actually originally repurposed this from some batching examples in the platform, so there are other examples of this pattern being used. See below:

Copy link
Contributor

@zacharis278 zacharis278 Jul 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay this was throwing me off too. reader[i:i + batch_size] is actually slicing a subset reader into a new list. If the k in list[i:k] is outside the bounds it won't matter it only pulls the subset of indexes that actually exist up to k. eg [1,2,3,4][2:20] evaluates to [3,4]

ignore_conflicts=True,
)
time.sleep(batch_delay)

# bulk create course staff
for i in range(0, len(reader), batch_size):
CourseStaffRole.objects.bulk_create(
CourseStaffRole(
(CourseStaffRole(
user=User.objects.get(username=row.get('username')),
course_id=row.get('course_id'),
role=row.get('role'),
)
for row in reader[i:i + batch_size]
) for row in reader[i:i + batch_size]),
ignore_conflicts=True,
varshamenon4 marked this conversation as resolved.
Show resolved Hide resolved
)
time.sleep(batch_delay)

# bulk create course staff
# for i in range(0, len(reader), batch_size):
# CourseStaffRole.objects.bulk_create(
# (CourseStaffRole(
# user=User.objects.get(username=row.get('username')),
# course_id=row.get('course_id'),
# role=row.get('role'),
# ) for row in reader[i:i + batch_size]),
# ignore_conflicts=True,
# )
# time.sleep(batch_delay)
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.test import TestCase

from edx_exams.apps.core.models import CourseStaffRole, User
from edx_exams.apps.core.test_utils.factories import UserFactory
from edx_exams.apps.core.test_utils.factories import CourseStaffRoleFactory, UserFactory


class TestBulkAddCourseStaff(TestCase):
Expand Down Expand Up @@ -90,9 +90,28 @@ def test_add_course_staff_with_not_default_batch_size(self):
'sam,[email protected],staff,course-v1:edx+test+f20\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
with self.assertNumQueries(9):
with self.assertNumQueries(8):
call_command(self.command, f'--csv_path={csv.name}', '--batch_size=1')

def test_add_course_staff_with_batch_size_larger_than_list(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@schenedx added this test case as you'd suggested! Thanks for the feedback.

""" Assert that the number of queries is correct given batch size larger than lines """
lines = ['pam,[email protected],staff,course-v1:edx+test+f20\n',
'sam,[email protected],staff,course-v1:edx+test+f20\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
with self.assertNumQueries(6):
call_command(self.command, f'--csv_path={csv.name}', '--batch_size=3')

def test_add_course_staff_with_batch_size_smaller_than_list(self):
""" Assert that the number of queries is correct given batch size smaller than lines """
lines = ['pam,[email protected],staff,course-v1:edx+test+f20\n',
'sam,[email protected],staff,course-v1:edx+test+f20\n'
'tam,[email protected],staff,course-v1:edx+test+f20\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
with self.assertNumQueries(9):
call_command(self.command, f'--csv_path={csv.name}', '--batch_size=2')

def test_add_course_staff_with_not_default_batch_delay(self):
username, email = 'pam', '[email protected]'
username2, email2 = 'cam', '[email protected]'
Expand All @@ -106,16 +125,16 @@ def test_add_course_staff_with_not_default_batch_delay(self):

def test_num_queries_correct(self):
"""
Assert the number of queries to be 5 + 1 * number of lines:
2 for savepoint/release savepoint, 1 to get existing usernames,
Assert the number of queries to be 4 + 1 * number of lines:
2 for savepoint/release savepoint
1 to bulk create users, 1 to bulk create course role
1 for each user (to get user)
"""
num_lines = 20
lines = [f'pam{i},pam{i}@pond.com,staff,course-v1:edx+test+f20\n' for i in range(num_lines)]
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
with self.assertNumQueries(5 + num_lines):
with self.assertNumQueries(4 + num_lines):
call_command(self.command, f'--csv_path={csv.name}')

def test_dupe_user_csv(self):
Expand All @@ -130,3 +149,32 @@ def test_dupe_user_csv(self):
call_command(self.command, f'--csv_path={csv.name}')
self._assert_user_and_role(username, email, self.course_role, self.course_id)
self._assert_user_and_role(username, email, self.course_role, course_id_2)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test case where you set the batch size to 2 and create like 3 course_staff users?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding two cases, one where the batch size is larger than the number of roles, and one where the batch size is smaller.

def test_existing_course_staff_csv(self):
""" Assert that the course staff role are correctly created given already existing course staff roles in csv """
course_existing = 'course-v1:edx+test+f24'
CourseStaffRoleFactory.create(
user=self.user,
course_id=course_existing,
role=self.course_role,
)
lines = [f'{self.user.username},{self.user.email},{self.course_role},{course_existing}\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
call_command(self.command, f'--csv_path={csv.name}')
self._assert_user_and_role(self.user.username, self.user.email, self.course_role, course_existing)

def test_dupe_course_staff_csv(self):
""" Assert that the course staff role are correctly created given dupe course staff roles in csv """
course_existing = 'course-v1:edx+test+f24'
CourseStaffRoleFactory.create(
user=self.user,
course_id=course_existing,
role=self.course_role,
)
lines = [f'{self.user.username},{self.user.email},{self.course_role},{course_existing}\n',
f'{self.user.username},{self.user.email},{self.course_role},{course_existing}\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
call_command(self.command, f'--csv_path={csv.name}')
self._assert_user_and_role(self.user.username, self.user.email, self.course_role, course_existing)
Loading