Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

search: improve with CompositeSuggestQueryParser #151

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions invenio_users_resources/services/users/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022 TU Wien.
# Copyright (C) 2022 CERN.
# Copyright (C) 2022-2024 CERN.
# Copyright (C) 2024 KTH Royal Institute of Technology.
#
# Invenio-Users-Resources is free software; you can redistribute it and/or
Expand All @@ -26,10 +26,10 @@
SortParam,
)
from invenio_records_resources.services.records.queryparser import (
CompositeSuggestQueryParser,
FieldValueMapper,
QueryParser,
SearchFieldTransformer,
SuggestQueryParser,
)
from luqum.tree import Word

Expand Down Expand Up @@ -68,9 +68,14 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin):
# The user search needs to be highly restricted to avoid leaking
# account information, hence do not edit here unless you are
# absolutely sure what you're doing.
suggest_parser_cls = SuggestQueryParser.factory(
suggest_parser_cls = CompositeSuggestQueryParser.factory(
tree_transformer_cls=SearchFieldTransformer,
fields=["username^2", "email^2", "profile.full_name^3", "profile.affiliations"],
fields=[
"username^2",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️
Searching for a username with a dash is still not working well.
For instance, searching from "one-two" seems to search for usernames starting with "one" and starting with "two", and therefore does not find anything.

Copy link
Contributor

@kpsherva kpsherva Dec 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the issue: text field is split by - https://github.com/inveniosoftware/invenio-users-resources/blob/master/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json#L132
if you search by username.keyword, it should work

"email.keyword^2",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

email had to be changed to email.keyword for the tests to pass.

"profile.full_name^3",
"profile.affiliations",
],
# Only public emails because hidden emails are stored in email_hidden field.
allow_list=["username", "email"],
mapping={
Expand All @@ -81,7 +86,6 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin):
"name": "profile.full_name",
},
type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fuzziness is applied in CompositeSuggestQueryParser.

)

params_interpreters_cls = [
Expand Down
35 changes: 22 additions & 13 deletions tests/services/users/test_service_users.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022 CERN.
# Copyright (C) 2022-2024 CERN.
#
# Invenio-Users-Resources is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand Down Expand Up @@ -98,24 +98,33 @@ def test_user_search_field_not_searchable(user_service, user_pub, query):
assert res["hits"]["total"] == 0


USERNAME_BOTH = ["pub", "pubres"]
USERNAME_JOSE = ["pub"]
USERNAME_TIM = ["pubres"]


#
# Read
@pytest.mark.parametrize(
"query",
"query,expected_usernames",
[
"CERN",
"Jose CERN",
"Jose AND CERN",
"Tim",
"Tim CERN",
"Jose",
"Jos",
"[email protected]",
"pub",
("CERN", USERNAME_BOTH),
("Jose", USERNAME_JOSE),
("Jos", USERNAME_JOSE),
("Jose CERN", USERNAME_JOSE),
("Tim", USERNAME_TIM),
("Tim CERN", USERNAME_TIM),
("[email protected]", USERNAME_JOSE),
("[email protected]", USERNAME_JOSE),
("pub@inveniosoft", USERNAME_JOSE),
("pub", USERNAME_BOTH),
],
)
def test_user_search_field(user_service, user_pub, query):
def test_user_search_field(user_service, user_pub, query, expected_usernames):
"""Make sure certain fields ARE searchable."""
res = user_service.search(user_pub.identity, suggest=query).to_dict()
assert res["hits"]["total"] > 0
usernames = [entry["username"] for entry in res["hits"]["hits"]]
assert sorted(usernames) == expected_usernames


#
Expand Down