Skip to content

Commit

Permalink
Merge pull request #798 from openedx/mkeating/ENT-8602
Browse files Browse the repository at this point in the history
feat: Add --dry-run and --no-async flags to update_content_metadata job
  • Loading branch information
marlonkeating authored May 1, 2024
2 parents 9f38df3 + 30b511a commit 9f3deec
Show file tree
Hide file tree
Showing 6 changed files with 392 additions and 103 deletions.
81 changes: 50 additions & 31 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ class LoggedTaskWithRetry(LoggedTask): # pylint: disable=abstract-method

@shared_task(base=LoggedTaskWithRetry, bind=True, default_retry_delay=UNREADY_TASK_RETRY_COUNTDOWN_SECONDS)
@expiring_task_semaphore()
def update_full_content_metadata_task(self, force=False): # pylint: disable=unused-argument
def update_full_content_metadata_task(self, force=False, dry_run=False): # pylint: disable=unused-argument
"""
Looks up the full metadata from discovery's `/api/v1/courses` and `/api/v1/programs` endpoints to pad all
ContentMetadata objects. The metadata is merged with the existing contents
Expand All @@ -244,9 +244,9 @@ def update_full_content_metadata_task(self, force=False): # pylint: disable=unu
"""

content_keys = [metadata.content_key for metadata in ContentMetadata.objects.filter(content_type=COURSE)]
_update_full_content_metadata_course(content_keys)
_update_full_content_metadata_course(content_keys, dry_run)
content_keys = [metadata.content_key for metadata in ContentMetadata.objects.filter(content_type=PROGRAM)]
_update_full_content_metadata_program(content_keys)
_update_full_content_metadata_program(content_keys, dry_run)


def _find_best_mode_seat(seats):
Expand Down Expand Up @@ -316,7 +316,7 @@ def _normalize_course_metadata(course_metadata_record):
json_meta['normalized_metadata'] = normalized_metadata


def _update_full_content_metadata_course(content_keys):
def _update_full_content_metadata_course(content_keys, dry_run=False):
"""
Given content_keys, finds the associated ContentMetadata records with a type of course and looks up the full
course metadata from discovery's /api/v1/courses endpoint to pad the ContentMetadata objects. The course
Expand Down Expand Up @@ -393,13 +393,20 @@ def _update_full_content_metadata_course(content_keys):
course_metadata_dict.get('programs', []),
metadata_record,
)
_update_full_content_metadata_program(program_content_keys)

ContentMetadata.objects.bulk_update(
modified_content_metadata_records,
['json_metadata'],
batch_size=10,
)
_update_full_content_metadata_program(program_content_keys, dry_run)
if dry_run:
logger.info('[Dry Run] Updated content metadata json for {}: {}'.format(
content_key, json.dumps(metadata_record.json_metadata)
))

if dry_run:
logger.info('dry_run=True, not updating course metadata')
else:
ContentMetadata.objects.bulk_update(
modified_content_metadata_records,
['json_metadata'],
batch_size=10,
)

logger.info(
'Successfully updated %d of %d ContentMetadata records with full metadata from course-discovery.',
Expand All @@ -416,7 +423,7 @@ def _update_full_content_metadata_course(content_keys):
)


def _update_full_content_metadata_program(content_keys):
def _update_full_content_metadata_program(content_keys, dry_run=False):
"""
Given content_keys, finds the associated ContentMetadata records with a type of program and looks up the full
program metadata from discovery's /api/v1/programs endpoint to pad the ContentMetadata objects. The program
Expand Down Expand Up @@ -462,11 +469,14 @@ def _update_full_content_metadata_program(content_keys):
metadata_record.json_metadata.update(program_metadata_dict)
modified_content_metadata_records.append(metadata_record)

ContentMetadata.objects.bulk_update(
modified_content_metadata_records,
['json_metadata'],
batch_size=10,
)
if dry_run:
logger.info('dry_run=true, not updating program metadata')
else:
ContentMetadata.objects.bulk_update(
modified_content_metadata_records,
['json_metadata'],
batch_size=10,
)

logger.info(
'Successfully updated %d of %d ContentMetadata records with full metadata from course-discovery.',
Expand Down Expand Up @@ -1094,7 +1104,7 @@ def _reindex_algolia(indexable_content_keys, nonindexable_content_keys, dry_run=

@shared_task(base=LoggedTaskWithRetry, bind=True)
@expiring_task_semaphore()
def update_catalog_metadata_task(self, catalog_query_id, force=False): # pylint: disable=unused-argument
def update_catalog_metadata_task(self, catalog_query_id, force=False, dry_run=False): # pylint: disable=unused-argument
"""
Associates ContentMetadata objects with the appropriate catalog query by pulling data
from /search/all on discovery.
Expand All @@ -1113,7 +1123,7 @@ def update_catalog_metadata_task(self, catalog_query_id, force=False): # pylint
logger.error('Could not find a CatalogQuery with id %s', catalog_query_id)

try:
associated_content_keys = update_contentmetadata_from_discovery(catalog_query)
associated_content_keys = update_contentmetadata_from_discovery(catalog_query, dry_run)
except Exception as e:
logger.exception(
f'Something went wrong while updating content metadata from discovery using catalog: {catalog_query_id} '
Expand All @@ -1130,7 +1140,7 @@ def update_catalog_metadata_task(self, catalog_query_id, force=False): # pylint

@shared_task(base=LoggedTaskWithRetry, bind=True)
@expiring_task_semaphore()
def fetch_missing_course_metadata_task(self, force=False): # pylint: disable=unused-argument
def fetch_missing_course_metadata_task(self, force=False, dry_run=False): # pylint: disable=unused-argument
"""
Creates a CatalogQuery for all the courses that do not have ContentMetadata instance.
Expand Down Expand Up @@ -1165,7 +1175,7 @@ def fetch_missing_course_metadata_task(self, force=False): # pylint: disable=un
defaults={'content_filter': content_filter, 'title': None},
)

associated_content_keys = update_contentmetadata_from_discovery(catalog_query)
associated_content_keys = update_contentmetadata_from_discovery(catalog_query, dry_run)
logger.info('[FETCH_MISSING_METADATA] Finished fetch_missing_course_metadata_task with {} associated content '
'keys for catalog {}'.format(len(associated_content_keys), catalog_query.id))
else:
Expand All @@ -1174,7 +1184,7 @@ def fetch_missing_course_metadata_task(self, force=False): # pylint: disable=un

@shared_task(base=LoggedTaskWithRetry, bind=True)
@expiring_task_semaphore()
def fetch_missing_pathway_metadata_task(self, force=False): # pylint: disable=unused-argument
def fetch_missing_pathway_metadata_task(self, force=False, dry_run=False): # pylint: disable=unused-argument
"""
Creates ContentMetadata for Learner Pathways and all its associates.
Expand All @@ -1196,7 +1206,7 @@ def fetch_missing_pathway_metadata_task(self, force=False): # pylint: disable=u
content_filter_hash=get_content_filter_hash(content_filter),
defaults={'content_filter': content_filter, 'title': None},
)
associated_content_keys = update_contentmetadata_from_discovery(catalog_query)
associated_content_keys = update_contentmetadata_from_discovery(catalog_query, dry_run)
logger.info(
'[FETCH_MISSING_METADATA] Finished Pathways fetch_missing_pathway_metadata_task with {} associated content '
'keys for catalog {}'.format(
Expand Down Expand Up @@ -1232,7 +1242,7 @@ def fetch_missing_pathway_metadata_task(self, force=False): # pylint: disable=u
defaults={'content_filter': content_filter, 'title': None},
)

associated_content_keys = update_contentmetadata_from_discovery(catalog_query)
associated_content_keys = update_contentmetadata_from_discovery(catalog_query, dry_run)
logger.info(
'[FETCH_MISSING_METADATA] Finished programs fetch_missing_pathway_metadata_task with {} keys for '
'catalog {}'.format(
Expand Down Expand Up @@ -1260,7 +1270,7 @@ def fetch_missing_pathway_metadata_task(self, force=False): # pylint: disable=u
defaults={'content_filter': content_filter, 'title': None},
)

associated_content_keys = update_contentmetadata_from_discovery(catalog_query)
associated_content_keys = update_contentmetadata_from_discovery(catalog_query, dry_run)
logger.info(
'[FETCH_MISSING_METADATA] Finished courses fetch_missing_pathway_metadata_task with {} keys for '
'catalog {}'.format(
Expand All @@ -1276,13 +1286,22 @@ def fetch_missing_pathway_metadata_task(self, force=False): # pylint: disable=u
associated_content_metadata = ContentMetadata.objects.filter(
content_key__in=program_uuids + course_keys
)
pathway.associated_content_metadata.set(associated_content_metadata)
logger.info(
'[FETCH_MISSING_METADATA] Learner Pathway {} associated created. No. of associations: {}'.format(
pathway.content_key,
pathway.associated_content_metadata.count(),
if dry_run:
logger.info(
('[FETCH_MISSING_METADATA][Dry Run] Learner Pathway {} associations created.'
'No. of associations: {}').format(
pathway.content_key,
pathway.associated_content_metadata.count(),
)
)
else:
pathway.associated_content_metadata.set(associated_content_metadata)
logger.info(
'[FETCH_MISSING_METADATA] Learner Pathway {} associated created. No. of associations: {}'.format(
pathway.content_key,
pathway.associated_content_metadata.count(),
)
)
)
else:
pathway.associated_content_metadata.clear()

Expand Down
97 changes: 94 additions & 3 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ def test_update_catalog_metadata(self, mock_update_data_from_discovery):
"""
Assert update_catalog_metadata_task is called with correct catalog_query_id
"""
tasks.update_catalog_metadata_task.apply(args=(self.catalog_query.id,))
mock_update_data_from_discovery.assert_called_with(self.catalog_query)
tasks.update_catalog_metadata_task.apply(args=(self.catalog_query.id, False, False))
mock_update_data_from_discovery.assert_called_with(self.catalog_query, False)

@mock.patch('enterprise_catalog.apps.api.tasks.update_contentmetadata_from_discovery')
def test_update_catalog_metadata_no_catalog_query(self, mock_update_data_from_discovery):
Expand Down Expand Up @@ -235,7 +235,7 @@ def test_fetch_missing_course_metadata_task(self, mock_update_data_from_discover
assert catalog_query.content_filter['content_type'] == 'course'
assert catalog_query.content_filter['key'] == [test_course]

mock_update_data_from_discovery.assert_called_with(catalog_query)
mock_update_data_from_discovery.assert_called_with(catalog_query, False)


@ddt.ddt
Expand Down Expand Up @@ -550,6 +550,48 @@ def test_update_full_metadata_program(self, mock_oauth_client, mock_partition_pr
assert metadata_1.json_metadata == program_data_1
assert metadata_2.json_metadata == program_data_2

@mock.patch('enterprise_catalog.apps.api.tasks.partition_program_keys_for_indexing')
@mock.patch('enterprise_catalog.apps.api_client.base_oauth.OAuthAPIClient')
def test_update_full_metadata_program_dry_run(self, mock_oauth_client, mock_partition_program_keys):
"""
Assert that during dry run full program metadata is not merged with original json_metadata
"""
program_key_1 = '02f5edeb-6604-4131-bf45-acd8df91e1f9'
program_data_1 = {'uuid': program_key_1, 'full_program_only_field': 'test_1'}
program_key_2 = 'be810df3-a059-42a7-b11f-d9bfb2877b15'
program_data_2 = {'uuid': program_key_2, 'full_program_only_field': 'test_2'}

# Mock out the data that should be returned from discovery's /api/v1/programs endpoint
mock_oauth_client.return_value.get.return_value.json.return_value = {
'results': [program_data_1, program_data_2],
}
mock_partition_program_keys.return_value = ([], [],)

metadata_1 = ContentMetadataFactory(content_type=PROGRAM, content_key=program_key_1)
metadata_1.catalog_queries.set([self.catalog_query])
metadata_2 = ContentMetadataFactory(content_type=PROGRAM, content_key=program_key_2)
metadata_2.catalog_queries.set([self.catalog_query])

assert metadata_1.json_metadata != program_data_1
assert metadata_2.json_metadata != program_data_2

tasks.update_full_content_metadata_task.apply(kwargs={'dry_run': True}).get()

actual_program_keys_args = mock_partition_program_keys.call_args_list[0][0][0]
self.assertEqual(set(actual_program_keys_args), {metadata_1, metadata_2})

metadata_1 = ContentMetadata.objects.get(content_key='02f5edeb-6604-4131-bf45-acd8df91e1f9')
metadata_2 = ContentMetadata.objects.get(content_key='be810df3-a059-42a7-b11f-d9bfb2877b15')

# Validate original json_metadata still in place after dry run
program_data_1.update(metadata_1.json_metadata)
program_data_2.update(metadata_2.json_metadata)
program_data_1.update({'aggregation_key': 'program:02f5edeb-6604-4131-bf45-acd8df91e1f9'})
program_data_2.update({'aggregation_key': 'program:be810df3-a059-42a7-b11f-d9bfb2877b15'})

assert metadata_1.json_metadata != program_data_1
assert metadata_2.json_metadata != program_data_2

# pylint: disable=unused-argument
@mock.patch('enterprise_catalog.apps.api.tasks.task_recently_run', return_value=False)
@mock.patch('enterprise_catalog.apps.api.tasks.partition_program_keys_for_indexing')
Expand Down Expand Up @@ -2128,3 +2170,52 @@ def test_update_full_content_metadata_course(

self.assertEqual(mock_update_content_metadata_program.call_count, 2)
self.assertEqual(mock_create_course_associated_programs.call_count, 2)

@mock.patch('enterprise_catalog.apps.api.tasks._fetch_courses_by_keys')
@mock.patch('enterprise_catalog.apps.api.tasks.DiscoveryApiClient.get_course_reviews')
@mock.patch('enterprise_catalog.apps.api.tasks.ContentMetadata.objects.filter')
@mock.patch('enterprise_catalog.apps.api.tasks.create_course_associated_programs')
@mock.patch('enterprise_catalog.apps.api.tasks._update_full_content_metadata_program')
def test_update_full_content_metadata_course_dry_run(
self,
mock_update_content_metadata_program,
mock_create_course_associated_programs,
mock_filter,
mock_get_course_reviews,
mock_fetch_courses_by_keys
):
# Mock data
content_keys = ['course1', 'course2']
full_course_dicts = [
{'key': 'course1', 'title': 'Course 1'},
{'key': 'course2', 'title': 'Course 2'}
]
reviews_for_courses_dict = {
'course1': {'reviews_count': 10, 'avg_course_rating': 4.5},
'course2': {'reviews_count': 5, 'avg_course_rating': 3.8}
}
content_metadata_1 = ContentMetadataFactory(content_type=COURSE, content_key='course1')
content_metadata_2 = ContentMetadataFactory(content_type=COURSE, content_key='course2')
metadata_records_for_fetched_keys = [content_metadata_1, content_metadata_2]

# Configure mock objects
mock_fetch_courses_by_keys.return_value = full_course_dicts
mock_get_course_reviews.return_value = reviews_for_courses_dict
mock_filter.return_value = metadata_records_for_fetched_keys

# Call the function
tasks._update_full_content_metadata_course(content_keys, dry_run=True) # pylint: disable=protected-access

mock_fetch_courses_by_keys.assert_called_once_with(content_keys)
mock_get_course_reviews.assert_called_once_with(['course1', 'course2'])
mock_filter.assert_called_once_with(content_key__in=['course1', 'course2'])

content_metadata_1.refresh_from_db()
content_metadata_2.refresh_from_db()
assert content_metadata_1.json_metadata.get('reviews_count') is None
assert content_metadata_1.json_metadata.get('avg_course_rating') is None
assert content_metadata_2.json_metadata.get('reviews_count') is None
assert content_metadata_2.json_metadata.get('avg_course_rating') is None

self.assertEqual(mock_update_content_metadata_program.call_count, 2)
self.assertEqual(mock_create_course_associated_programs.call_count, 2)
Loading

0 comments on commit 9f3deec

Please sign in to comment.