Skip to content

Commit

Permalink
fix: libraries performance problem
Browse files Browse the repository at this point in the history
This is an attempt to fix a performance problem on the libraries home page. When you go to studio home and click on the libraries tab, on prod it will be quick for admins but extremely slow for course instructors (> 12 seconds) and leads to timeouts. It grows with the number of libraries that are assigned to the instructor.

The Python code for the request to load libraries for a particular user goes through all existing libraries and then checks all of the user's roles for each library, which results in a complexity of O(l*r), l=libraries, r=roles. This PR improves the complexity to O(l).

The BulkRoleCache and RoleCache classes were using a python set to store all roles for a particular user. A user can have a large number of roles, and lookup speed of iterating through a set is slow (O(n)). Most roles don't have the same course id, however. So if you have the course id of the role you're looking for, we can use a dict of course ids that contain related roles. The number of roles per course id is negligible, so we arrive at a lookup speed of O(1) when looking up a user's roles that belong to a specific course id.

The BulkRoleCache now caches and stores user roles in a data structure like this:
    {
        user_id_1: {
            course_id_1: {role1, role2, role3},  # Set of roles associated with course_id_1
            course_id_2: {role4, role5, role6},  # Set of roles associated with course_id_2
            [ROLE_CACHE_UNGROUPED_ROLES_KEY]: {role7, role8}  # Set of roles not tied to any specific course or library. For example, Global Staff roles.
        },
        user_id_2: { ... }  # Similar structure for another user
    }

While this changes the data structure used to store roles under the hood and adds the new property `roles_by_course_id` to the RoleCache,
when initializing the RoleCache will store roles additionally in the previous data structure - as a flat set - in the `_roles` property accessible via `all_roles_set`. This establishes
backwards compatibility.

We are now storing roles twice in the RoleCache (in each of the two data structures), which means this takes twice as much memory, but only in the scope of a request.
  • Loading branch information
jesperhodge committed May 20, 2024
1 parent e7b4db0 commit e95d7e7
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 20 deletions.
1 change: 1 addition & 0 deletions cms/djangoapps/contentstore/rest_api/v1/views/home.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,4 +215,5 @@ def get(self, request: Request):

library_context = get_library_context(request)
serializer = LibraryTabSerializer(library_context)

return Response(serializer.data)
2 changes: 1 addition & 1 deletion cms/djangoapps/contentstore/views/course.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
CourseStaffRole,
GlobalStaff,
UserBasedRole,
OrgStaffRole
OrgStaffRole,
)
from common.djangoapps.util.json_request import JsonResponse, JsonResponseBadRequest, expect_json
from common.djangoapps.util.string_utils import _has_non_ascii_characters
Expand Down
3 changes: 2 additions & 1 deletion common/djangoapps/student/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
OrgContentCreatorRole,
OrgInstructorRole,
OrgLibraryUserRole,
OrgStaffRole
OrgStaffRole,
)

# Studio permissions:
Expand Down Expand Up @@ -106,6 +106,7 @@ def get_user_permissions(user, course_key, org=None):
# Staff have all permissions except EDIT_ROLES:
if OrgStaffRole(org=org).has_user(user) or (course_key and user_has_role(user, CourseStaffRole(course_key))):
return STUDIO_VIEW_USERS | STUDIO_EDIT_CONTENT | STUDIO_VIEW_CONTENT

# Otherwise, for libraries, users can view only:
if course_key and isinstance(course_key, LibraryLocator):
if OrgLibraryUserRole(org=org).has_user(user) or user_has_role(user, LibraryUserRole(course_key)):
Expand Down
2 changes: 1 addition & 1 deletion common/djangoapps/student/role_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ def get_course_roles(user: User) -> list[CourseAccessRole]:
"""
# pylint: disable=protected-access
role_cache = get_role_cache(user)
return list(role_cache._roles)
return list(role_cache.all_roles_set)
85 changes: 72 additions & 13 deletions common/djangoapps/student/roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
"""


from collections import defaultdict
import logging
from abc import ABCMeta, abstractmethod
from collections import defaultdict
from contextlib import contextmanager

from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
Expand All @@ -23,6 +23,9 @@
# A mapping of roles to the roles that they inherit permissions from.
ACCESS_ROLES_INHERITANCE = {}

# The key used to store roles for a user in the cache that do not belong to a course or do not have a course id.
ROLE_CACHE_UNGROUPED_ROLES__KEY = 'ungrouped'


def register_access_role(cls):
"""
Expand Down Expand Up @@ -60,21 +63,52 @@ def strict_role_checking():
ACCESS_ROLES_INHERITANCE.update(OLD_ACCESS_ROLES_INHERITANCE)


def get_role_cache_key_for_course(course_key=None):
"""
Get the cache key for the course key.
"""
return str(course_key) if course_key else ROLE_CACHE_UNGROUPED_ROLES__KEY


class BulkRoleCache: # lint-amnesty, pylint: disable=missing-class-docstring
"""
This class provides a caching mechanism for roles grouped by users and courses,
using a nested dictionary structure to optimize lookup performance. The cache structure is designed as follows:
{
user_id_1: {
course_id_1: {role1, role2, role3}, # Set of roles associated with course_id_1
course_id_2: {role4, role5, role6}, # Set of roles associated with course_id_2
[ROLE_CACHE_UNGROUPED_ROLES_KEY]: {role7, role8} # Set of roles not tied to any specific course or library
},
user_id_2: { ... } # Similar structure for another user
}
- Each top-level dictionary entry keys by `user_id` to access role data for a specific user.
- Nested within each user's dictionary, entries are keyed by `course_id` grouping roles by course.
- The special key `ROLE_CACHE_UNGROUPED_ROLES_KEY` (a constant defined above)
stores roles that are not associated with any specific course or library.
"""

CACHE_NAMESPACE = "student.roles.BulkRoleCache"
CACHE_KEY = 'roles_by_user'

@classmethod
def prefetch(cls, users): # lint-amnesty, pylint: disable=missing-function-docstring
roles_by_user = defaultdict(set)
roles_by_user = defaultdict(lambda: defaultdict(set))
get_cache(cls.CACHE_NAMESPACE)[cls.CACHE_KEY] = roles_by_user

for role in CourseAccessRole.objects.filter(user__in=users).select_related('user'):
roles_by_user[role.user.id].add(role)
user_id = role.user.id
course_id = get_role_cache_key_for_course(role.course_id)

# Add role to the set in roles_by_user[user_id][course_id]
user_roles_set_for_course = roles_by_user[user_id][course_id]
user_roles_set_for_course.add(role)

users_without_roles = [u for u in users if u.id not in roles_by_user]
for user in users_without_roles:
roles_by_user[user.id] = set()
roles_by_user[user.id] = {}

@classmethod
def get_user_roles(cls, user):
Expand All @@ -83,15 +117,32 @@ def get_user_roles(cls, user):

class RoleCache:
"""
A cache of the CourseAccessRoles held by a particular user
A cache of the CourseAccessRoles held by a particular user.
Internal data structures should be accessed by getter and setter methods;
don't use `_roles_by_course_id` or `_roles` directly.
_roles_by_course_id: This is the data structure as saved in the RequestCache.
It contains all roles for a user as a dict that's keyed by course_id.
The key ROLE_CACHE_UNGROUPED_ROLES__KEY is used for all roles
that are not associated with a course.
_roles: This is a set of all roles for a user, ungrouped. It's used for some types of
lookups and collected from _roles_by_course_id on initialization
so that it doesn't need to be recalculated.
"""
def __init__(self, user):
try:
self._roles = BulkRoleCache.get_user_roles(user)
self._roles_by_course_id = BulkRoleCache.get_user_roles(user)
except KeyError:
self._roles = set(
CourseAccessRole.objects.filter(user=user).all()
)
self._roles_by_course_id = {}
roles = CourseAccessRole.objects.filter(user=user).all()
for role in roles:
course_id = get_role_cache_key_for_course(role.course_id)
if not self._roles_by_course_id.get(course_id):
self._roles_by_course_id[course_id] = set()
self._roles_by_course_id[course_id].add(role)
self._roles = set()
for roles_for_course in self._roles_by_course_id.values():
self._roles.update(roles_for_course)

@staticmethod
def get_roles(role):
Expand All @@ -100,16 +151,24 @@ def get_roles(role):
"""
return ACCESS_ROLES_INHERITANCE.get(role, set()) | {role}

@property
def all_roles_set(self):
return self._roles

@property
def roles_by_course_id(self):
return self._roles_by_course_id

def has_role(self, role, course_id, org):
"""
Return whether this RoleCache contains a role with the specified role
or a role that inherits from the specified role, course_id and org.
"""
course_id_string = get_role_cache_key_for_course(course_id)
course_roles = self._roles_by_course_id.get(course_id_string, [])
return any(
access_role.role in self.get_roles(role) and
access_role.course_id == course_id and
access_role.org == org
for access_role in self._roles
access_role.role in self.get_roles(role) and access_role.org == org
for access_role in course_roles
)


Expand Down
84 changes: 80 additions & 4 deletions common/djangoapps/student/tests/test_roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import ddt
from django.test import TestCase
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import LibraryLocator

from common.djangoapps.student.roles import (
CourseAccessRole,
Expand All @@ -22,7 +23,9 @@
OrgContentCreatorRole,
OrgInstructorRole,
OrgStaffRole,
RoleCache
RoleCache,
get_role_cache_key_for_course,
ROLE_CACHE_UNGROUPED_ROLES__KEY
)
from common.djangoapps.student.role_helpers import get_course_roles, has_staff_roles
from common.djangoapps.student.tests.factories import AnonymousUserFactory, InstructorFactory, StaffFactory, UserFactory
Expand All @@ -35,7 +38,7 @@ class RolesTestCase(TestCase):

def setUp(self):
super().setUp()
self.course_key = CourseKey.from_string('edX/toy/2012_Fall')
self.course_key = CourseKey.from_string('course-v1:course-v1:edX+toy+2012_Fall')
self.course_loc = self.course_key.make_usage_key('course', '2012_Fall')
self.anonymous_user = AnonymousUserFactory()
self.student = UserFactory()
Expand Down Expand Up @@ -189,8 +192,9 @@ def test_get_orgs_for_user(self):
@ddt.ddt
class RoleCacheTestCase(TestCase): # lint-amnesty, pylint: disable=missing-class-docstring

IN_KEY = CourseKey.from_string('edX/toy/2012_Fall')
NOT_IN_KEY = CourseKey.from_string('edX/toy/2013_Fall')
IN_KEY_STRING = 'course-v1:edX+toy+2012_Fall'
IN_KEY = CourseKey.from_string(IN_KEY_STRING)
NOT_IN_KEY = CourseKey.from_string('course-v1:edX+toy+2013_Fall')

ROLES = (
(CourseStaffRole(IN_KEY), ('staff', IN_KEY, 'edX')),
Expand Down Expand Up @@ -233,3 +237,75 @@ def test_only_in_role(self, role, target):
def test_empty_cache(self, role, target): # lint-amnesty, pylint: disable=unused-argument
cache = RoleCache(self.user)
assert not cache.has_role(*target)

def test_get_role_cache_key_for_course_for_course_object_gets_string(self):
"""
Given a valid course key object, get_role_cache_key_for_course
should return the string representation of the key.
"""
course_string = 'course-v1:edX+toy+2012_Fall'
key = CourseKey.from_string(course_string)
key = get_role_cache_key_for_course(key)
assert key == course_string

def test_get_role_cache_key_for_course_for_undefined_object_returns_default(self):
"""
Given a value None, get_role_cache_key_for_course
should return the default key for ungrouped courses.
"""
key = get_role_cache_key_for_course(None)
assert key == ROLE_CACHE_UNGROUPED_ROLES__KEY

def test_role_cache_get_roles_set(self):
"""
Test that the RoleCache.all_roles_set getter method returns a flat set of all roles for a user
and that the ._roles attribute is the same as the set to avoid legacy behavior being broken.
"""
lib0 = LibraryLocator.from_string('library-v1:edX+quizzes')
course0 = CourseKey.from_string('course-v1:edX+toy+2012_Summer')
course1 = CourseKey.from_string('course-v1:edX+toy2+2013_Fall')
role_library_v1 = LibraryUserRole(lib0)
role_course_0 = CourseInstructorRole(course0)
role_course_1 = CourseInstructorRole(course1)

role_library_v1.add_users(self.user)
role_course_0.add_users(self.user)
role_course_1.add_users(self.user)

cache = RoleCache(self.user)
assert cache.has_role('library_user', lib0, 'edX')
assert cache.has_role('instructor', course0, 'edX')
assert cache.has_role('instructor', course1, 'edX')

assert len(cache.all_roles_set) == 3
roles_set = cache.all_roles_set
for role in roles_set:
assert role.course_id.course in ('quizzes', 'toy2', 'toy')

assert roles_set == cache._roles # pylint: disable=protected-access

def test_role_cache_roles_by_course_id(self):
"""
Test that the RoleCache.roles_by_course_id getter method returns a dictionary of roles for a user
that are grouped by course_id or if ungrouped by the ROLE_CACHE_UNGROUPED_ROLES__KEY.
"""
lib0 = LibraryLocator.from_string('library-v1:edX+quizzes')
course0 = CourseKey.from_string('course-v1:edX+toy+2012_Summer')
course1 = CourseKey.from_string('course-v1:edX+toy2+2013_Fall')
role_library_v1 = LibraryUserRole(lib0)
role_course_0 = CourseInstructorRole(course0)
role_course_1 = CourseInstructorRole(course1)
role_org_staff = OrgStaffRole('edX')

role_library_v1.add_users(self.user)
role_course_0.add_users(self.user)
role_course_1.add_users(self.user)
role_org_staff.add_users(self.user)

cache = RoleCache(self.user)
roles_dict = cache.roles_by_course_id
assert len(roles_dict) == 4
assert roles_dict.get(ROLE_CACHE_UNGROUPED_ROLES__KEY).pop().role == 'staff'
assert roles_dict.get('library-v1:edX+quizzes').pop().course_id.course == 'quizzes'
assert roles_dict.get('course-v1:edX+toy+2012_Summer').pop().course_id.course == 'toy'
assert roles_dict.get('course-v1:edX+toy2+2013_Fall').pop().course_id.course == 'toy2'

0 comments on commit e95d7e7

Please sign in to comment.