Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: catalog query filtering without elasticsearch #675

Merged
merged 1 commit into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions enterprise_catalog/apps/catalog/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""
Utility functions for catalog query filtering without elasticsearch
"""
import logging


logger = logging.getLogger(__name__)


SUPPORTED_FILTER_COMPARISONS = [
'exact',
'not',
'exclude',
'gt',
'gte',
'lt',
'lte',
]


class QueryFilterException(Exception):
"""
An exception for content catalog query filtering
"""


def fix_common_query_key_mistakes(raw_query_key):
"""
In production many queries have odd typos
which seem to have been copypasta-proliferated
"""
corrections_for_typos = {
'aggregation_key': [
'aggregration__key',
'aggregation__key',
],
'org__exclude': [
'org__exempt',
],
}
for correction, typos in corrections_for_typos.items():
if raw_query_key in typos:
return correction

Check warning on line 43 in enterprise_catalog/apps/catalog/filters.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/catalog/filters.py#L43

Added line #L43 was not covered by tests
return raw_query_key


def extract_field_and_comparison_kind(raw_query_key):
"""
Taking an query key, extra the content_metadata
field name and the kind of comparison matching
should be used.
"""
field = None
# comparison_kind defaults to "exact match"
comparison_kind = 'exact'
split_query_key = raw_query_key.split("__")
if len(split_query_key) == 2:
field, comparison_kind = split_query_key
elif len(split_query_key) > 2:
raise QueryFilterException(f'invalid syntax "{raw_query_key}"')
else:
field = raw_query_key
if comparison_kind not in SUPPORTED_FILTER_COMPARISONS:
raise QueryFilterException(f'unsupported action "{comparison_kind}" from query key "{raw_query_key}"')
logger.debug(f'extract_field_and_action "{raw_query_key}" -> {field}, {comparison_kind}')
return field, comparison_kind


def field_comparison(query_value, content_value, comparison_kind):
"""
compre the fields based on the comparison kind
python 3.10 has match (like switch)
"""
if comparison_kind == 'exact':
return content_value == query_value
elif comparison_kind == 'not':
return content_value != query_value
elif comparison_kind == 'exclude':
return content_value != query_value
elif comparison_kind == 'gt':
return float(content_value) > float(query_value)
elif comparison_kind == 'gte':
return float(content_value) >= float(query_value)
elif comparison_kind == 'lt':
return float(content_value) < float(query_value)
elif comparison_kind == 'lte':
return float(content_value) <= float(query_value)
else:
raise QueryFilterException(f'invalid comparison kind "{comparison_kind}"')

Check warning on line 89 in enterprise_catalog/apps/catalog/filters.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/catalog/filters.py#L89

Added line #L89 was not covered by tests


def does_query_match_content(query_dict, content_metadata_dict):
"""
Evaluate a query and a content_metadata object to determine
if the given content_metadata and query match.
This is meant to partially emulate Django FieldLookups
for dictionaries rather than querysets.
https://docs.djangoproject.com/en/4.2/ref/models/querysets/#field-lookups
"""
results = {}
for raw_query_key, query_value in query_dict.items():

query_key = fix_common_query_key_mistakes(raw_query_key)
field, comparison_kind = extract_field_and_comparison_kind(query_key)

if comparison_kind not in SUPPORTED_FILTER_COMPARISONS:
raise QueryFilterException(

Check warning on line 107 in enterprise_catalog/apps/catalog/filters.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/catalog/filters.py#L107

Added line #L107 was not covered by tests
f'unsupported comparison_kind "{comparison_kind}" '
f'from query key "{raw_query_key}"'
)

content_value = content_metadata_dict.get(field)
logger.debug(f'{query_key}, {field} -> {query_value}, {content_value}')

field_result = False
if isinstance(query_value, list):
field_results = []
for query_value_item in query_value:
this_field_result = field_comparison(query_value_item, content_value, comparison_kind)
logger.debug(f'{query_value_item}, {content_value}, {comparison_kind} -> {this_field_result}')
field_results.append(this_field_result)
# "exact" here means "IN" as in "is edx+demo IN ['edx+demo', 'mit+demo']"
if comparison_kind == 'exact':
field_result = any(field_results)
# else here means "NOT IN"
else:
field_result = all(field_results)
else:
field_result = field_comparison(query_value, content_value, comparison_kind)

logger.debug(f'{query_key}, {field} {comparison_kind} -> {query_value}, {content_value}, {field_result}')
results[field] = field_result
logger.debug(results)
return all(results.values())
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import logging

from django.core.management.base import BaseCommand

from enterprise_catalog.apps.catalog import filters
from enterprise_catalog.apps.catalog.models import (
ContentMetadata,
EnterpriseCatalog,
)


logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = (
'Compare the Enterprise Catalog Query results to our own Catalog Filter'
)

def handle(self, *args, **options):
"""
Cook it.
"""
logger.info('compare_catalog_queries_to_filters starting...')
for content_metadata in ContentMetadata.objects.all():
for enterprise_catalog in EnterpriseCatalog.objects.all():
discovery_included = content_metadata in enterprise_catalog.content_metadata
match = filters.does_query_match_content(
enterprise_catalog.catalog_query.content_filter,
content_metadata.json_metadata
)
logger.info(
'compare_catalog_queries_to_filters '
f'enterprise_catalog={enterprise_catalog.uuid}, '
f'content_metadata={content_metadata.content_key}, '
f'discovery_included={discovery_included}, '
f'filter_match={match}'
)
logger.info('compare_catalog_queries_to_filters complete.')
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from unittest import mock

from django.core.management import call_command
from django.test import TestCase

from enterprise_catalog.apps.catalog.models import (
CatalogQuery,
ContentMetadata,
EnterpriseCatalog,
)
from enterprise_catalog.apps.catalog.tests.factories import (
CatalogQueryFactory,
ContentMetadataFactory,
EnterpriseCatalogFactory,
)


class CompareCatalogQueriesToFiltersCommandTests(TestCase):
command_name = 'compare_catalog_queries_to_filters'

def setUp(self):
super().setUp()
self.catalog_query_c = CatalogQueryFactory(content_filter={'content_type': 'course'})
self.enterprise_catalog_c = EnterpriseCatalogFactory(catalog_query=self.catalog_query_c)
self.course_c = ContentMetadataFactory.create(content_type='course')
self.course_c.catalog_queries.add(self.catalog_query_c)

def tearDown(self):
super().tearDown()
# clean up any stale test objects
ContentMetadata.objects.all().delete()
CatalogQuery.objects.all().delete()
EnterpriseCatalog.objects.all().delete()

@mock.patch('enterprise_catalog.apps.catalog.filters.does_query_match_content')
def test_update_content_metadata_for_all_queries(
self, mock_does_query_match_content,
):
"""
Verify that the job calls the comparison with the test data
"""
mock_does_query_match_content.return_value = True
call_command(self.command_name)
mock_does_query_match_content.assert_called_with(self.catalog_query_c.content_filter, self.course_c.json_metadata)
Loading
Loading