Skip to content

Commit

Permalink
feat: catalog query filtering without elasticsearch
Browse files Browse the repository at this point in the history
  • Loading branch information
johnnagro committed Sep 28, 2023
1 parent 44b4b17 commit 7290c19
Show file tree
Hide file tree
Showing 5 changed files with 711 additions and 0 deletions.
134 changes: 134 additions & 0 deletions enterprise_catalog/apps/catalog/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""
Utility functions for catalog query filtering without elasticsearch
"""
import logging


logger = logging.getLogger(__name__)


SUPPORTED_FILTER_COMPARISONS = [
'exact',
'not',
'exclude',
'gt',
'gte',
'lt',
'lte',
]


class QueryFilterException(Exception):
"""
An exception for content catalog query filtering
"""


def fix_common_query_key_mistakes(raw_query_key):
"""
In production many queries have odd typos
which seem to have been copypasta-proliferated
"""
corrections_for_typos = {
'aggregation_key': [
'aggregration__key',
'aggregation__key',
],
'org__exclude': [
'org__exempt',
],
}
for correction, typos in corrections_for_typos.items():
if raw_query_key in typos:
return correction
return raw_query_key


def extract_field_and_comparison_kind(raw_query_key):
"""
Taking an query key, extra the content_metadata
field name and the kind of comparison matching
should be used.
"""
field = None
# comparison_kind defaults to "exact match"
comparison_kind = 'exact'
split_query_key = raw_query_key.split("__")
if len(split_query_key) == 2:
field, comparison_kind = split_query_key
elif len(split_query_key) > 2:
raise QueryFilterException(f'invalid syntax "{raw_query_key}"')
else:
field = raw_query_key
if comparison_kind not in SUPPORTED_FILTER_COMPARISONS:
raise QueryFilterException(f'unsupported action "{comparison_kind}" from query key "{raw_query_key}"')
logger.debug(f'extract_field_and_action "{raw_query_key}" -> {field}, {comparison_kind}')
return field, comparison_kind


def field_comparison(query_value, content_value, comparison_kind):
"""
compre the fields based on the comparison kind
python 3.10 has match (like switch)
"""
if comparison_kind == 'exact':
return content_value == query_value
elif comparison_kind == 'not':
return content_value != query_value
elif comparison_kind == 'exclude':
return content_value != query_value
elif comparison_kind == 'gt':
return float(content_value) > float(query_value)
elif comparison_kind == 'gte':
return float(content_value) >= float(query_value)
elif comparison_kind == 'lt':
return float(content_value) < float(query_value)
elif comparison_kind == 'lte':
return float(content_value) <= float(query_value)
else:
raise QueryFilterException(f'invalid comparison kind "{comparison_kind}"')


def does_query_match_content(query_dict, content_metadata_dict):
"""
Evaluate a query and a content_metadata object to determine
if the given content_metadata and query match.
This is meant to partially emulate Django FieldLookups
for dictionaries rather than querysets.
https://docs.djangoproject.com/en/4.2/ref/models/querysets/#field-lookups
"""
results = {}
for raw_query_key, query_value in query_dict.items():

query_key = fix_common_query_key_mistakes(raw_query_key)
field, comparison_kind = extract_field_and_comparison_kind(query_key)

if comparison_kind not in SUPPORTED_FILTER_COMPARISONS:
raise QueryFilterException(
f'unsupported comparison_kind "{comparison_kind}" '
f'from query key "{raw_query_key}"'
)

content_value = content_metadata_dict.get(field)
logger.debug(f'{query_key}, {field} -> {query_value}, {content_value}')

field_result = False
if isinstance(query_value, list):
field_results = []
for query_value_item in query_value:
this_field_result = field_comparison(query_value_item, content_value, comparison_kind)
logger.debug(f'{query_value_item}, {content_value}, {comparison_kind} -> {this_field_result}')
field_results.append(this_field_result)
# "exact" here means "IN" as in "is edx+demo IN ['edx+demo', 'mit+demo']"
if comparison_kind == 'exact':
field_result = any(field_results)
# else here means "NOT IN"
else:
field_result = all(field_results)
else:
field_result = field_comparison(query_value, content_value, comparison_kind)

logger.debug(f'{query_key}, {field} {comparison_kind} -> {query_value}, {content_value}, {field_result}')
results[field] = field_result
logger.debug(results)
return all(results.values())
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import logging

from django.core.management.base import BaseCommand

from enterprise_catalog.apps.catalog import filters
from enterprise_catalog.apps.catalog.models import (
ContentMetadata,
EnterpriseCatalog,
)


logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = (
'Compare the Enterprise Catalog Query results to our own Catalog Filter'
)

def handle(self, *args, **options):
"""
Cook it.
"""
logger.info('compare_catalog_queries_to_filters starting...')
for content_metadata in ContentMetadata.objects.all():
for enterprise_catalog in EnterpriseCatalog.objects.all():
discovery_included = content_metadata in enterprise_catalog.content_metadata
match = filters.does_query_match_content(
enterprise_catalog.catalog_query.content_filter,
content_metadata.json_metadata
)
logger.info(
'compare_catalog_queries_to_filters '
f'enterprise_catalog={enterprise_catalog.uuid}, '
f'content_metadata={content_metadata.content_key}, '
f'discovery_included={discovery_included}, '
f'filter_match={match}'
)
logger.info('compare_catalog_queries_to_filters complete.')
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from unittest import mock

from django.core.management import call_command
from django.test import TestCase

from enterprise_catalog.apps.catalog.models import (
CatalogQuery,
ContentMetadata,
EnterpriseCatalog,
)
from enterprise_catalog.apps.catalog.tests.factories import (
CatalogQueryFactory,
ContentMetadataFactory,
EnterpriseCatalogFactory,
)


class CompareCatalogQueriesToFiltersCommandTests(TestCase):
command_name = 'compare_catalog_queries_to_filters'

def setUp(self):
super().setUp()
self.catalog_query_c = CatalogQueryFactory(content_filter={'content_type': 'course'})
self.enterprise_catalog_c = EnterpriseCatalogFactory(catalog_query=self.catalog_query_c)
self.course_c = ContentMetadataFactory.create(content_type='course', catalog_queries=[self.catalog_query_c])

def tearDown(self):
super().tearDown()
# clean up any stale test objects
ContentMetadata.objects.all().delete()
CatalogQuery.objects.all().delete()
EnterpriseCatalog.objects.all().delete()

@mock.patch('enterprise_catalog.apps.catalog.filters.does_query_match_content')
def test_update_content_metadata_for_all_queries(
self, mock_does_query_match_content,
):
"""
Verify that the job calls the comparison with the test data
"""
mock_does_query_match_content.return_value = True
call_command(self.command_name)
mock_does_query_match_content.assert_called_with(self.catalog_query_c.content_filter, self.course_c.json_metadata)
10 changes: 10 additions & 0 deletions enterprise_catalog/apps/catalog/tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ class Meta:
content_type = factory.Iterator([COURSE_RUN, COURSE, PROGRAM, LEARNER_PATHWAY])
parent_content_key = None

@factory.post_generation
def catalog_queries(self, create, extracted, **kwargs):
# http://web.archive.org/web/20230928174535/https://factoryboy.readthedocs.io/en/latest/recipes.html#simple-many-to-many-relationship
if not create or not extracted:
# Simple build, or nothing to add, do nothing.
return

# Add the iterable of catalog_queries using bulk addition
self.catalog_queries.add(*extracted) # pylint: disable=no-member

@factory.lazy_attribute
def json_metadata(self):
json_metadata = {
Expand Down
Loading

0 comments on commit 7290c19

Please sign in to comment.