Skip to content

Commit

Permalink
Merge pull request #688 from openedx/pwnage101/ENT-7729
Browse files Browse the repository at this point in the history
feat: programs ONLY inherit UUIDs for catalogs common to ALL content within.
  • Loading branch information
pwnage101 authored Oct 3, 2023
2 parents e5a5bcc + 27b4937 commit 8adc14d
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 8 deletions.
45 changes: 37 additions & 8 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from celery import shared_task, states
from celery.exceptions import Ignore
from celery_utils.logged_task import LoggedTask
from django.conf import settings
from django.db import IntegrityError
from django.db.models import Prefetch, Q
from django.db.utils import OperationalError
Expand Down Expand Up @@ -676,6 +677,7 @@ def add_metadata_to_algolia_objects(
_add_in_algolia_products_by_object_id(algolia_products_by_object_id, batched_metadata)


# pylint: disable=too-many-statements
def _get_algolia_products_for_batch(
batch_num,
content_keys_batch,
Expand Down Expand Up @@ -727,6 +729,9 @@ def _get_algolia_products_for_batch(
customer_uuids_by_key = defaultdict(set)
catalog_queries_by_key = defaultdict(set)

catalog_query_uuid_by_catalog_uuid = defaultdict(set)
customer_uuid_by_catalog_uuid = defaultdict(set)

# Create a shared convenience queryset to prefetch catalogs for all metadata lookups below.
all_catalog_queries = CatalogQuery.objects.prefetch_related('enterprise_catalogs')

Expand Down Expand Up @@ -789,18 +794,42 @@ def _get_algolia_products_for_batch(
for catalog in associated_catalogs:
catalog_uuids_by_key[content_key].add(str(catalog.uuid))
customer_uuids_by_key[content_key].add(str(catalog.enterprise_uuid))
# Cache UUIDs related to each catalog.
catalog_query_uuid_by_catalog_uuid[str(catalog.uuid)] = (str(catalog_query.uuid), catalog_query.title)
customer_uuid_by_catalog_uuid[str(catalog.uuid)] = str(catalog.enterprise_uuid)

# Second pass. This time the goal is to capture indirect relationships on programs:
# * For each program:
# - Absorb all UUIDs associated with every associated course.
for metadata in content_metadata_to_process:
if metadata.content_type != PROGRAM:
continue
program_content_key = metadata.content_key
for metadata in program_to_courses_mapping[program_content_key]:
catalog_queries_by_key[program_content_key].update(catalog_queries_by_key[metadata.content_key])
catalog_uuids_by_key[program_content_key].update(catalog_uuids_by_key[metadata.content_key])
customer_uuids_by_key[program_content_key].update(customer_uuids_by_key[metadata.content_key])
if settings.ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS:
for program_metadata in content_metadata_to_process:
if program_metadata.content_type != PROGRAM:
continue
program_content_key = program_metadata.content_key
catalog_uuids_for_courses_of_program = [
catalog_uuids_by_key[course_metadata.content_key]
for course_metadata in program_to_courses_mapping[program_content_key]
]
common_catalogs = set()
if catalog_uuids_for_courses_of_program:
common_catalogs = set.intersection(*catalog_uuids_for_courses_of_program)
for course_metadata in program_to_courses_mapping[program_content_key]:
catalog_queries_by_key[program_content_key].update(
catalog_query_uuid_by_catalog_uuid[catalog_uuid] for catalog_uuid in common_catalogs
)
catalog_uuids_by_key[program_content_key].update(common_catalogs)
customer_uuids_by_key[program_content_key].update(
customer_uuid_by_catalog_uuid[catalog_uuid] for catalog_uuid in common_catalogs
)
else: # Old deprecated code in this else block. Remove as part of ENT-7729.
for metadata in content_metadata_to_process:
if metadata.content_type != PROGRAM:
continue
program_content_key = metadata.content_key
for metadata in program_to_courses_mapping[program_content_key]:
catalog_queries_by_key[program_content_key].update(catalog_queries_by_key[metadata.content_key])
catalog_uuids_by_key[program_content_key].update(catalog_uuids_by_key[metadata.content_key])
customer_uuids_by_key[program_content_key].update(customer_uuids_by_key[metadata.content_key])

# Third pass. This time the goal is to capture indirect relationships on pathways:
# * For each pathway:
Expand Down
182 changes: 182 additions & 0 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,188 @@ def _set_up_factory_data_for_algolia(self):
'course_metadata_unpublished': self.course_metadata_unpublished,
}

@mock.patch('django.conf.settings.ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS', True)
@mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock())
def test_index_algolia_partial_program(self, mock_search_client):
"""
Assert that when a program contains multiple courses, that program only inherits the UUIDs common to all
contained courses.
This DAG represents the complete test environment:
┌────────────┐┌────────────┐┌────────────┐
│*course-1 ││*course-2 ││*course-3 │
│------------││------------││------------│
│in catalog-1││ ││ │
│in catalog-2││in catalog-2││ │
│in catalog-3││in catalog-3││in catalog-3│
│ ││in catalog-4││in catalog-4│
│ ││ ││in catalog-5│
└┬───────────┘└┬───────────┘└┬───────────┘
┌▽─────────────▽─────────────▽───────────┐
│*program-1 │
│----------------------------------------│
│(should inherit catalog-3 only) │
└────────────────────────────────────────┘
* = indexable
"""
program_1 = ContentMetadataFactory(content_type=PROGRAM, content_key='program-1')
course_1 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-1')
course_2 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-2')
course_3 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-3')

# Associate all three courses with the program.
course_1.associated_content_metadata.set([program_1])
course_2.associated_content_metadata.set([program_1])
course_3.associated_content_metadata.set([program_1])

# Create all 5 test catalogs.
catalog_queries = [CatalogQueryFactory(uuid=uuid.uuid4()) for _ in range(5)]
catalogs = [
EnterpriseCatalogFactory(catalog_query=query)
for query in catalog_queries
]

# Associate the 5 catalogs to the 3 courses in a staggering fashion.
course_1.catalog_queries.set(catalog_queries[0:3])
course_2.catalog_queries.set(catalog_queries[1:4])
course_3.catalog_queries.set(catalog_queries[2:5])

course_1.save()
course_2.save()
course_3.save()

actual_algolia_products_sent = []

# `replace_all_objects` is swapped out for a mock implementation that forces generator evaluation and saves the
# result into `actual_algolia_products_sent` for unit testing.
def mock_replace_all_objects(products_iterable):
nonlocal actual_algolia_products_sent
actual_algolia_products_sent = list(products_iterable)
mock_search_client().replace_all_objects.side_effect = mock_replace_all_objects

with mock.patch('enterprise_catalog.apps.api.tasks.ALGOLIA_FIELDS', self.ALGOLIA_FIELDS):
with self.assertLogs(level='INFO') as info_logs:
tasks.index_enterprise_catalog_in_algolia_task() # pylint: disable=no-value-for-parameter

products_found_log_records = [record for record in info_logs.output if ' products found.' in record]
assert ' 15 products found.' in products_found_log_records[0]

# create expected data to be added/updated in the Algolia index.
expected_program_1_objects_to_index = []
program_uuid = program_1.json_metadata.get('uuid')
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-uuids-0',
'enterprise_catalog_uuids': [str(catalogs[2].uuid)],
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-customer-uuids-0',
'enterprise_customer_uuids': [str(catalogs[2].enterprise_uuid)],
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-query-uuids-0',
'enterprise_catalog_query_uuids': [str(catalog_queries[2].uuid)],
'enterprise_catalog_query_titles': [catalog_queries[2].title],
})

# verify replace_all_objects is called with the correct Algolia object data.
expected_program_call_args = sorted(expected_program_1_objects_to_index, key=itemgetter('objectID'))
actual_program_call_args = sorted(
[product for product in actual_algolia_products_sent if program_uuid in product['objectID']],
key=itemgetter('objectID'),
)
assert expected_program_call_args == actual_program_call_args

@mock.patch('django.conf.settings.ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS', False)
@mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock())
def test_index_algolia_partial_program_disabled(self, mock_search_client):
"""
Assert that when a program contains multiple courses, that program inherits all the UUIDs from contained
courses. This is the old behavior prior to ENT-7729. Remove this unit test as part of that ticket.
This DAG represents the complete test environment:
┌────────────┐┌────────────┐┌────────────┐
│*course-1 ││*course-2 ││*course-3 │
│------------││------------││------------│
│in catalog-1││ ││ │
│in catalog-2││in catalog-2││ │
│in catalog-3││in catalog-3││in catalog-3│
│ ││in catalog-4││in catalog-4│
│ ││ ││in catalog-5│
└┬───────────┘└┬───────────┘└┬───────────┘
┌▽─────────────▽─────────────▽───────────┐
│*program-1 │
│----------------------------------------│
│(should inherit all catalogs) │
└────────────────────────────────────────┘
* = indexable
"""
program_1 = ContentMetadataFactory(content_type=PROGRAM, content_key='program-1')
course_1 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-1')
course_2 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-2')
course_3 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-3')

# Associate all three courses with the program.
course_1.associated_content_metadata.set([program_1])
course_2.associated_content_metadata.set([program_1])
course_3.associated_content_metadata.set([program_1])

# Create all 5 test catalogs.
catalog_queries = [CatalogQueryFactory(uuid=uuid.uuid4()) for _ in range(5)]
catalogs = [
EnterpriseCatalogFactory(catalog_query=query)
for query in catalog_queries
]

# Associate the 5 catalogs to the 3 courses in a staggering fashion.
course_1.catalog_queries.set(catalog_queries[0:3])
course_2.catalog_queries.set(catalog_queries[1:4])
course_3.catalog_queries.set(catalog_queries[2:5])

course_1.save()
course_2.save()
course_3.save()

actual_algolia_products_sent = []

# `replace_all_objects` is swapped out for a mock implementation that forces generator evaluation and saves the
# result into `actual_algolia_products_sent` for unit testing.
def mock_replace_all_objects(products_iterable):
nonlocal actual_algolia_products_sent
actual_algolia_products_sent = list(products_iterable)
mock_search_client().replace_all_objects.side_effect = mock_replace_all_objects

with mock.patch('enterprise_catalog.apps.api.tasks.ALGOLIA_FIELDS', self.ALGOLIA_FIELDS):
with self.assertLogs(level='INFO') as info_logs:
tasks.index_enterprise_catalog_in_algolia_task() # pylint: disable=no-value-for-parameter

products_found_log_records = [record for record in info_logs.output if ' products found.' in record]
assert ' 15 products found.' in products_found_log_records[0]

# create expected data to be added/updated in the Algolia index.
expected_program_1_objects_to_index = []
program_uuid = program_1.json_metadata.get('uuid')
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-uuids-0',
'enterprise_catalog_uuids': sorted([str(catalog.uuid) for catalog in catalogs]),
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-customer-uuids-0',
'enterprise_customer_uuids': sorted([str(catalog.enterprise_uuid) for catalog in catalogs]),
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-query-uuids-0',
'enterprise_catalog_query_uuids': sorted([str(catalog_query.uuid) for catalog_query in catalog_queries]),
'enterprise_catalog_query_titles': sorted([catalog_query.title for catalog_query in catalog_queries]),
})

# verify replace_all_objects is called with the correct Algolia object data.
expected_program_call_args = sorted(expected_program_1_objects_to_index, key=itemgetter('objectID'))
actual_program_call_args = sorted(
[product for product in actual_algolia_products_sent if program_uuid in product['objectID']],
key=itemgetter('objectID'),
)
assert expected_program_call_args == actual_program_call_args

def test_index_content_keys_in_algolia(self):
"""
Test the _index_content_keys_in_algolia helper function to make sure it creates a generator to support batching
Expand Down
7 changes: 7 additions & 0 deletions enterprise_catalog/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,3 +429,10 @@
'VERSION': '1.0.0',
'SERVE_INCLUDE_SCHEMA': False,
}

# (ENT-7729) When indexing programs in Algolia, only attach catalog query/catalog/customer UUIDs common to all content
# within the program. This should have the outcome of only showing completely accessible programs in the catalog search
# page: https://enterprise.edx.org/<customer>/search
#
# Enable this on stage first.
ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS = False

0 comments on commit 8adc14d

Please sign in to comment.