From 13a1fca02b5cc1f73b154d4f2b694e7aa53c3a7c Mon Sep 17 00:00:00 2001 From: Pablo Tamarit Date: Mon, 9 Dec 2024 09:10:07 +0100 Subject: [PATCH] subjects: improve search with CompositeSuggestQueryParser --- .../contrib/subjects/config.py | 9 ++--- invenio_vocabularies/services/querystr.py | 5 +++ .../subjects/test_subjects_resource.py | 36 +++++++++++-------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/invenio_vocabularies/contrib/subjects/config.py b/invenio_vocabularies/contrib/subjects/config.py index 888362ff..eb7c3959 100644 --- a/invenio_vocabularies/contrib/subjects/config.py +++ b/invenio_vocabularies/contrib/subjects/config.py @@ -15,10 +15,12 @@ from invenio_i18n import lazy_gettext as _ from invenio_records_resources.services import SearchOptions from invenio_records_resources.services.records.components import DataComponent +from invenio_records_resources.services.records.queryparser import ( + CompositeSuggestQueryParser, +) from werkzeug.local import LocalProxy from ...services.components import PIDComponent -from ...services.querystr import FilteredSuggestQueryParser subject_schemes = LocalProxy( lambda: current_app.config["VOCABULARIES_SUBJECTS_SCHEMES"] @@ -38,9 +40,8 @@ class SubjectsSearchOptions(SearchOptions): """Search options.""" - suggest_parser_cls = FilteredSuggestQueryParser.factory( - filter_field="scheme", - fields=[ # suggest fields + suggest_parser_cls = CompositeSuggestQueryParser.factory( + fields=[ "subject^100", localized_title, "synonyms^20", diff --git a/invenio_vocabularies/services/querystr.py b/invenio_vocabularies/services/querystr.py index b84cf6be..1886f5f4 100644 --- a/invenio_vocabularies/services/querystr.py +++ b/invenio_vocabularies/services/querystr.py @@ -8,6 +8,7 @@ """Querystring parsing.""" +import warnings from functools import partial from invenio_records_resources.services.records.params import SuggestQueryParser @@ -20,6 +21,10 @@ class FilteredSuggestQueryParser(SuggestQueryParser): @classmethod def factory(cls, filter_field=None, **extra_params): """Create a prepared instance of the query parser.""" + warnings.warn( + "FilteredSuggestQueryParser is deprecated, use SuggestQueryParser or CompositeSuggestQueryParser instead", + DeprecationWarning, + ) return partial(cls, filter_field=filter_field, extra_params=extra_params) def __init__(self, identity=None, filter_field=None, extra_params=None): diff --git a/tests/contrib/subjects/test_subjects_resource.py b/tests/contrib/subjects/test_subjects_resource.py index 77abd01b..f1c88592 100644 --- a/tests/contrib/subjects/test_subjects_resource.py +++ b/tests/contrib/subjects/test_subjects_resource.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2021 CERN. +# Copyright (C) 2021-2024 CERN. # Copyright (C) 2021 Northwestern University. # # Invenio-Vocabularies is free software; you can redistribute it and/or @@ -129,22 +129,28 @@ def example_subjects(app, db, search_clear, identity, service): def test_suggest(client, h, prefix, example_subjects): - """Test FilteredSuggestParam.""" - # No filter + """Test the subjects query parser.""" + # Full word + res = client.get(f"{prefix}?suggest=abdomen", headers=h) + assert res.json["hits"]["total"] == 4 + # The less specific text appears at the end of the list + assert res.json["hits"]["hits"][3]["subject"] == "Abdomen, Acute" + + # Incomplete word -> finds the same results res = client.get(f"{prefix}?suggest=abdo", headers=h) assert res.json["hits"]["total"] == 4 - # Single filter - res = client.get(f"{prefix}?suggest=MeSH:abdo", headers=h) - assert res.status_code == 200 - assert res.json["hits"]["total"] == 2 + # Multiple words -> narrows down the results + res = client.get(f"{prefix}?suggest=abdomen%20acute", headers=h) + assert res.json["hits"]["total"] == 1 + assert res.json["hits"]["hits"][0]["subject"] == "Abdomen, Acute" - # Multiple filters - res = client.get(f"{prefix}?suggest=MeSH,Other:abdo", headers=h) - assert res.status_code == 200 - assert res.json["hits"]["total"] == 3 + # Multiple words with incomplete last word -> still finds the same result + res = client.get(f"{prefix}?suggest=abdomen%20acu", headers=h) + assert res.json["hits"]["total"] == 1 + assert res.json["hits"]["hits"][0]["subject"] == "Abdomen, Acute" - # Ignore non existing filter - res = client.get(f"{prefix}?suggest=MeSH,Foo:abdo", headers=h) - assert res.status_code == 200 - assert res.json["hits"]["total"] == 2 + # Multiple words with incomplete last word in other order -> still finds the same result + res = client.get(f"{prefix}?suggest=acute%20abdo", headers=h) + assert res.json["hits"]["total"] == 1 + assert res.json["hits"]["hits"][0]["subject"] == "Abdomen, Acute"