Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: take exec ed course data from course run instead of additional_metadata attempt 4 #988

Merged
merged 1 commit into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from enterprise_catalog.apps.catalog.models import CatalogQuery, ContentMetadata
from enterprise_catalog.apps.catalog.serializers import (
DEFAULT_NORMALIZED_PRICE,
NormalizedContentMetadataSerializer,
_find_best_mode_seat,
)
from enterprise_catalog.apps.catalog.tests.factories import (
Expand Down Expand Up @@ -57,6 +58,37 @@ def mock_task(self, *args, **kwargs): # pylint: disable=unused-argument
mock_task.name = 'mock_task'


def _hydrate_normalized_metadata(metadata_record):
"""
Populate normalized_metadata fields for ContentMetadata
"""
normalized_metadata_input = {
'course_metadata': metadata_record.json_metadata,
}
metadata_record.json_metadata['normalized_metadata'] =\
NormalizedContentMetadataSerializer(normalized_metadata_input).data
metadata_record.json_metadata['normalized_metadata_by_run'] = {}
for run in metadata_record.json_metadata.get('course_runs', []):
metadata_record.json_metadata['normalized_metadata_by_run'].update({
run['key']: NormalizedContentMetadataSerializer({
'course_run_metadata': run,
'course_metadata': metadata_record.json_metadata,
}).data
})


def _hydrate_course_normalized_metadata():
"""
Populate normalized_metadata fields for all course ContentMetadata
Needed for tests that generate test ContentMetadata, which does not have
normalized_metadata populated by default.
"""
all_course_metadata = ContentMetadata.objects.filter(content_type=COURSE)
for course_metadata in all_course_metadata:
_hydrate_normalized_metadata(course_metadata)
course_metadata.save()


@ddt.ddt
class TestTaskResultFunctions(TestCase):
"""
Expand Down Expand Up @@ -830,6 +862,8 @@ def setUp(self):
self.course_run_metadata_unpublished.catalog_queries.set([course_run_catalog_query])
self.course_run_metadata_unpublished.save()

_hydrate_course_normalized_metadata()

def _set_up_factory_data_for_algolia(self):
expected_catalog_uuids = sorted([
str(self.enterprise_catalog_courses.uuid),
Expand Down Expand Up @@ -1030,6 +1064,7 @@ def test_index_algolia_program_common_uuids_only(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -1129,6 +1164,7 @@ def test_index_algolia_program_unindexable_content(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -2328,6 +2364,7 @@ def test_index_algolia_duplicate_content_uuids(self, mock_search_client):
)
course_run_for_duplicate = ContentMetadataFactory(content_type=COURSE_RUN, parent_content_key='duplicateX')
course_run_for_duplicate.catalog_queries.set([self.enterprise_catalog_course_runs.catalog_query])
_hydrate_course_normalized_metadata()

actual_algolia_products_sent_sequence = []

Expand Down
206 changes: 98 additions & 108 deletions enterprise_catalog/apps/api/v1/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,69 +170,102 @@ def program_hit_to_row(hit):
return csv_row


def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
csv_row = []
csv_row.append(hit.get('title'))

if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)
def _base_csv_row_data(hit):
""" Returns the formatted, shared attributes common across all course types. """
title = hit.get('title')
aggregation_key = hit.get('aggregation_key')
language = hit.get('language')
transcript_languages = ', '.join(hit.get('transcript_languages', []))
marketing_url = hit.get('marketing_url')
short_description = strip_tags(hit.get('short_description', ''))
subjects = ', '.join(hit.get('subjects', []))
skills = ', '.join([skill['name'] for skill in hit.get('skills', [])])
outcome = strip_tags(hit.get('outcome', '')) # What You’ll Learn

# FIXME: currently ignores partner names when a course has multiple partners
partner_name = hit['partners'][0]['name'] if hit.get('partners') else None

empty_advertised_course_run = {}
advertised_course_run = hit.get('advertised_course_run', empty_advertised_course_run)
advertised_course_run_key = advertised_course_run.get('key')
min_effort = advertised_course_run.get('min_effort')
max_effort = advertised_course_run.get('max_effort')
weeks_to_complete = advertised_course_run.get('weeks_to_complete') # Length

if start_date := advertised_course_run.get('start'):
start_date = parser.parse(start_date).strftime(DATE_FORMAT)
csv_row.append(start_date)

if end_date := advertised_course_run.get('end'):
end_date = parser.parse(end_date).strftime(DATE_FORMAT)
csv_row.append(end_date)

# upgrade_deadline deprecated in favor of enroll_by
if upgrade_deadline := advertised_course_run.get('upgrade_deadline'):
upgrade_deadline = datetime.datetime.fromtimestamp(upgrade_deadline).strftime(DATE_FORMAT)
csv_row.append(upgrade_deadline)

if enroll_by := advertised_course_run.get('enroll_by'):
enroll_by = datetime.datetime.fromtimestamp(enroll_by).strftime(DATE_FORMAT)
csv_row.append(enroll_by)

pacing_type = advertised_course_run.get('pacing_type')
key = advertised_course_run.get('key')

csv_row.append(', '.join(hit.get('programs', [])))
csv_row.append(', '.join(hit.get('program_titles', [])))

csv_row.append(pacing_type)

csv_row.append(hit.get('level_type'))

content_price = None
if content_price := advertised_course_run.get('content_price'):
content_price = math.trunc(float(content_price))
csv_row.append(content_price)
return {
'title': title,
'partner_name': partner_name,
'start_date': start_date,
'end_date': end_date,
'enroll_by': enroll_by,
'aggregation_key': aggregation_key,
'advertised_course_run_key': advertised_course_run_key,
'language': language,
'transcript_languages': transcript_languages,
'marketing_url': marketing_url,
'short_description': short_description,
'subjects': subjects,
'skills': skills,
'min_effort': min_effort,
'max_effort': max_effort,
'weeks_to_complete': weeks_to_complete,
'outcome': outcome,
'advertised_course_run': advertised_course_run,
'content_price': content_price
}

csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))
def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partner_name'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))
advertised_course_run = row_data.get('advertised_course_run')

advertised_course_run = hit.get('advertised_course_run', {})
csv_row.append(advertised_course_run.get('min_effort'))
csv_row.append(advertised_course_run.get('max_effort'))
csv_row.append(advertised_course_run.get('weeks_to_complete')) # Length
csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn
# upgrade_deadline deprecated in favor of enroll_by
if upgrade_deadline := advertised_course_run.get('upgrade_deadline'):
upgrade_deadline = datetime.datetime.fromtimestamp(upgrade_deadline).strftime(DATE_FORMAT)
csv_row.append(upgrade_deadline)
csv_row.append(row_data.get('enroll_by'))
csv_row.append(', '.join(hit.get('programs', [])))
csv_row.append(', '.join(hit.get('program_titles', [])))

pacing_type = advertised_course_run.get('pacing_type')
csv_row.append(pacing_type)

csv_row.append(hit.get('level_type'))
csv_row.append(row_data.get('content_price'))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('advertised_course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))

csv_row.append(strip_tags(hit.get('prerequisites_raw', ''))) # Pre-requisites

Expand All @@ -242,75 +275,32 @@ def course_hit_to_row(hit):
return csv_row


def fetch_and_format_registration_date(obj):
enroll_by_date = obj.get('registration_deadline')
stripped_enroll_by = enroll_by_date.split("T")[0]
formatted_enroll_by = None
try:
enroll_by_datetime_obj = datetime.datetime.strptime(stripped_enroll_by, '%Y-%m-%d')
formatted_enroll_by = enroll_by_datetime_obj.strftime('%m-%d-%Y')
except ValueError as exc:
logger.info(f"Unable to format registration deadline, failed with error: {exc}")
return formatted_enroll_by


def exec_ed_course_to_row(hit):
"""
Helper function to construct a CSV row according to a single executive education course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(hit.get('title'))

if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)
if hit.get('additional_metadata'):
start_date = None
additional_md = hit['additional_metadata']
if additional_md.get('start_date'):
start_date = parser.parse(additional_md['start_date']).strftime(DATE_FORMAT)
csv_row.append(start_date)

end_date = None
if additional_md.get('end_date'):
end_date = parser.parse(additional_md['end_date']).strftime(DATE_FORMAT)
csv_row.append(end_date)
formatted_enroll_by = fetch_and_format_registration_date(additional_md)
else:
csv_row.append(None) # no start date
csv_row.append(None) # no end date
formatted_enroll_by = None

csv_row.append(formatted_enroll_by)

adv_course_run = hit.get('advertised_course_run', {})
key = adv_course_run.get('key')

empty_advertised_course_run = {}
advertised_course_run = hit.get('advertised_course_run', empty_advertised_course_run)
if content_price := advertised_course_run.get('content_price'):
content_price = math.trunc(float(content_price))
csv_row.append(content_price)

csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))

csv_row.append(adv_course_run.get('min_effort'))
csv_row.append(adv_course_run.get('max_effort'))
csv_row.append(adv_course_run.get('weeks_to_complete')) # Length

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn

csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partners'))

csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))
csv_row.append(row_data.get('enroll_by'))

csv_row.append(row_data.get('content_price'))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('advertised_course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))
csv_row.append(strip_tags(hit.get('full_description', '')))

return csv_row
Expand Down
13 changes: 0 additions & 13 deletions enterprise_catalog/apps/api/v1/tests/test_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,3 @@ def test_retrieve_available_fields(self):
"""
# assert that ALGOLIA_ATTRIBUTES_TO_RETRIEVE is a SUBSET of ALGOLIA_FIELDS
assert set(export_utils.ALGOLIA_ATTRIBUTES_TO_RETRIEVE) <= set(algolia_utils.ALGOLIA_FIELDS)

def test_fetch_and_format_registration_date(self):
"""
Test the export properly fetches executive education registration dates
"""
# expected hit format from algolia, porperly reformatted for csv download
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '2002-02-15T12:12:200'}
) == '02-15-2002'
# some other format from algolia, should return None
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '02-15-2015T12:12:200'}
) is None
1 change: 1 addition & 0 deletions enterprise_catalog/apps/api/v1/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,7 @@ class EnterpriseCatalogWorkbookViewTests(APITestMixin):
"weeks_to_complete": 6,
"upgrade_deadline": 32503680000.0,
"enroll_by": 32503680000.0,
"content_price": 2843.00
},
"course_runs": [

Expand Down
Loading
Loading