Skip to content

Commit

Permalink
Fix Lucene query
Browse files Browse the repository at this point in the history
  • Loading branch information
LoayGhreeb committed Oct 27, 2024
1 parent 53cd50c commit 4d437c1
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 138 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.jabref.model.search.query.SqlQueryNode;
import org.jabref.search.SearchParser;

import org.apache.lucene.search.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -24,9 +23,9 @@ public static String flagsToSearchExpression(SearchQuery searchQuery) {
return new SearchFlagsToExpressionVisitor(searchQuery.getSearchFlags()).visit(searchQuery.getContext());
}

public static Query searchToLucene(SearchQuery searchQuery) {
public static String searchToLucene(SearchQuery searchQuery) {
LOGGER.debug("Converting search expression to Lucene: {}", searchQuery.getSearchExpression());
return new SearchToLuceneVisitor().visit(searchQuery.getContext());
return new SearchToLuceneVisitor(searchQuery.getSearchFlags()).visit(searchQuery.getContext());
}

public static List<SearchQueryNode> extractSearchTerms(SearchQuery searchQuery) {
Expand Down
169 changes: 74 additions & 95 deletions src/main/java/org/jabref/logic/search/query/SearchToLuceneVisitor.java
Original file line number Diff line number Diff line change
@@ -1,152 +1,131 @@
package org.jabref.logic.search.query;

import java.util.EnumSet;
import java.util.List;
import java.util.Locale;

import org.jabref.model.search.LinkedFilesConstants;
import org.jabref.model.search.SearchFlags;
import org.jabref.search.SearchBaseVisitor;
import org.jabref.search.SearchParser;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.queryparser.classic.QueryParser;

/**
* Tests are located in {@link org.jabref.logic.search.query.SearchQueryLuceneConversionTest}.
*/
public class SearchToLuceneVisitor extends SearchBaseVisitor<Query> {
public class SearchToLuceneVisitor extends SearchBaseVisitor<String> {
private final EnumSet<SearchFlags> searchFlags;

private static final List<String> SEARCH_FIELDS = LinkedFilesConstants.PDF_FIELDS;

private final QueryBuilder queryBuilder;

public SearchToLuceneVisitor() {
this.queryBuilder = new QueryBuilder(LinkedFilesConstants.LINKED_FILES_ANALYZER);
public SearchToLuceneVisitor(EnumSet<SearchFlags> searchFlags) {
this.searchFlags = searchFlags;
}

@Override
public Query visitStart(SearchParser.StartContext ctx) {
public String visitStart(SearchParser.StartContext ctx) {
return visit(ctx.andExpression());
}

@Override
public Query visitImplicitAndExpression(SearchParser.ImplicitAndExpressionContext ctx) {
List<Query> children = ctx.expression().stream().map(this::visit).toList();
if (children.size() == 1) {
return children.getFirst();
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Query child : children) {
builder.add(child, BooleanClause.Occur.MUST);
}
return builder.build();
public String visitImplicitAndExpression(SearchParser.ImplicitAndExpressionContext ctx) {
List<String> children = ctx.expression().stream().map(this::visit).toList();
return children.size() == 1 ? children.getFirst() : String.join(" ", children);
}

@Override
public Query visitParenExpression(SearchParser.ParenExpressionContext ctx) {
return visit(ctx.andExpression());
public String visitParenExpression(SearchParser.ParenExpressionContext ctx) {
String expr = visit(ctx.andExpression());
return expr.isEmpty() ? "" : "(" + expr + ")";
}

@Override
public Query visitNegatedExpression(SearchParser.NegatedExpressionContext ctx) {
Query innerQuery = visit(ctx.expression());
if (innerQuery instanceof MatchNoDocsQuery) {
return innerQuery;
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(innerQuery, BooleanClause.Occur.MUST_NOT);
return builder.build();
public String visitNegatedExpression(SearchParser.NegatedExpressionContext ctx) {
return "NOT (" + visit(ctx.expression()) + ")";
}

@Override
public Query visitBinaryExpression(SearchParser.BinaryExpressionContext ctx) {
Query left = visit(ctx.left);
Query right = visit(ctx.right);
public String visitBinaryExpression(SearchParser.BinaryExpressionContext ctx) {
String left = visit(ctx.left);
String right = visit(ctx.right);

if (left instanceof MatchNoDocsQuery) {
if (left.isEmpty() && right.isEmpty()) {
return "";
}
if (left.isEmpty()) {
return right;
}
if (right instanceof MatchNoDocsQuery) {
if (right.isEmpty()) {
return left;
}

BooleanQuery.Builder builder = new BooleanQuery.Builder();
String operator = ctx.bin_op.getType() == SearchParser.AND ? " AND " : " OR ";
return left + operator + right;
}

@Override
public String visitComparison(SearchParser.ComparisonContext ctx) {
String term = SearchQueryConversion.unescapeSearchValue(ctx.searchValue());
boolean isQuoted = ctx.searchValue().getStart().getType() == SearchParser.STRING_LITERAL;

// unfielded expression
if (ctx.FIELD() == null) {
if (searchFlags.contains(SearchFlags.REGULAR_EXPRESSION)) {
return "/" + term + "/";
}
return isQuoted ? "\"" + escapeQuotes(term) + "\"" : QueryParser.escape(term);
}

if (ctx.bin_op.getType() == SearchParser.AND) {
builder.add(left, BooleanClause.Occur.MUST);
builder.add(right, BooleanClause.Occur.MUST);
} else if (ctx.bin_op.getType() == SearchParser.OR) {
builder.add(left, BooleanClause.Occur.SHOULD);
builder.add(right, BooleanClause.Occur.SHOULD);
String field = ctx.FIELD().getText().toLowerCase(Locale.ROOT);
if (!isValidField(field)) {
return "";
}

return builder.build();
field = "any".equals(field) || "anyfield".equals(field) ? "" : field + ":";
int operator = ctx.operator().getStart().getType();
return buildFieldExpression(field, term, operator, isQuoted);
}

@Override
public Query visitComparisonExpression(SearchParser.ComparisonExpressionContext ctx) {
return visit(ctx.comparison());
private boolean isValidField(String field) {
return "any".equals(field) || "anyfield".equals(field) || LinkedFilesConstants.PDF_FIELDS.contains(field);
}

@Override
public Query visitComparison(SearchParser.ComparisonContext ctx) {
String field = ctx.FIELD() == null ? null : ctx.FIELD().getText();
String term = SearchQueryConversion.unescapeSearchValue(ctx.searchValue());
private String buildFieldExpression(String field, String term, int operator, boolean isQuoted) {
boolean isRegexOp = isRegexOperator(operator);
boolean isNegationOp = isNegationOperator(operator);

// unfielded expression
if (field == null || "anyfield".equals(field) || "any".equals(field)) {
return createMultiFieldQuery(term, ctx.operator());
} else if (SEARCH_FIELDS.contains(field)) {
return createFieldQuery(field, term, ctx.operator());
if (isRegexOp) {
String expression = field + "/" + term + "/";
return isNegationOp ? "NOT " + expression : expression;
} else {
return new MatchNoDocsQuery();
term = isQuoted ? "\"" + escapeQuotes(term) + "\"" : QueryParser.escape(term);
String expression = field + term;
return isNegationOp ? "NOT " + expression : expression;
}
}

private Query createMultiFieldQuery(String value, SearchParser.OperatorContext operator) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String field : SEARCH_FIELDS) {
builder.add(createFieldQuery(field, value, operator), BooleanClause.Occur.SHOULD);
}
return builder.build();
private static String escapeQuotes(String term) {
return term.replace("\"", "\\\"");
}

private Query createFieldQuery(String field, String value, SearchParser.OperatorContext operator) {
if (operator == null) {
return createTermOrPhraseQuery(field, value);
}

return switch (operator.getStart().getType()) {
case SearchParser.REQUAL,
SearchParser.CREEQUAL ->
new RegexpQuery(new Term(field, value));
private static boolean isNegationOperator(int operator) {
return switch (operator) {
case SearchParser.NEQUAL,
SearchParser.NCEQUAL,
SearchParser.NEEQUAL,
SearchParser.NCEEQUAL ->
createNegatedQuery(createTermOrPhraseQuery(field, value));
case SearchParser.NREQUAL,
SearchParser.NCREEQUAL ->
createNegatedQuery(new RegexpQuery(new Term(field, value)));
default ->
createTermOrPhraseQuery(field, value);
SearchParser.NCEEQUAL,
SearchParser.NREQUAL,
SearchParser.NCREEQUAL -> true;
default -> false;
};
}

private Query createNegatedQuery(Query query) {
BooleanQuery.Builder negatedQuery = new BooleanQuery.Builder();
negatedQuery.add(query, BooleanClause.Occur.MUST_NOT);
return negatedQuery.build();
}

private Query createTermOrPhraseQuery(String field, String value) {
if (value.contains("*") || value.contains("?")) {
return new TermQuery(new Term(field, value));
}
return queryBuilder.createPhraseQuery(field, value);
private static boolean isRegexOperator(int operator) {
return switch (operator) {
case SearchParser.REQUAL,
SearchParser.CREEQUAL,
SearchParser.NREQUAL,
SearchParser.NCREEQUAL -> true;
default -> false;
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

import org.apache.lucene.document.Document;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
Expand All @@ -39,29 +42,36 @@ public final class LinkedFilesSearcher {
private final FilePreferences filePreferences;
private final BibDatabaseContext databaseContext;
private final SearcherManager searcherManager;
private final MultiFieldQueryParser parser;

public LinkedFilesSearcher(BibDatabaseContext databaseContext, LuceneIndexer linkedFilesIndexer, FilePreferences filePreferences) {
this.searcherManager = linkedFilesIndexer.getSearcherManager();
this.databaseContext = databaseContext;
this.filePreferences = filePreferences;
this.parser = new MultiFieldQueryParser(LinkedFilesConstants.PDF_FIELDS.toArray(new String[0]), LinkedFilesConstants.LINKED_FILES_ANALYZER);
parser.setDefaultOperator(QueryParser.Operator.AND);
}

public SearchResults search(SearchQuery searchQuery) {
if (!searchQuery.isValid()) {
return new SearchResults();
}

Query luceneQuery = SearchQueryConversion.searchToLucene(searchQuery);
Optional<Query> luceneQuery = getLuceneQuery(searchQuery);
if (luceneQuery.isEmpty()) {
return new SearchResults();
}

EnumSet<SearchFlags> searchFlags = searchQuery.getSearchFlags();
boolean shouldSearchInLinkedFiles = searchFlags.contains(SearchFlags.FULLTEXT) && filePreferences.shouldFulltextIndexLinkedFiles();
if (!shouldSearchInLinkedFiles) {
return new SearchResults();
}

LOGGER.debug("Searching in linked files with query: {}", luceneQuery);
LOGGER.debug("Searching in linked files with query: {}", luceneQuery.get());
try {
IndexSearcher linkedFilesIndexSearcher = acquireIndexSearcher(searcherManager);
SearchResults searchResults = search(linkedFilesIndexSearcher, luceneQuery);
SearchResults searchResults = search(linkedFilesIndexSearcher, luceneQuery.get());
releaseIndexSearcher(searcherManager, linkedFilesIndexSearcher);
return searchResults;
} catch (IOException | IndexSearcher.TooManyClauses e) {
Expand All @@ -70,6 +80,16 @@ public SearchResults search(SearchQuery searchQuery) {
return new SearchResults();
}

private Optional<Query> getLuceneQuery(SearchQuery searchQuery) {
String query = SearchQueryConversion.searchToLucene(searchQuery);
try {
return Optional.of(parser.parse(query));
} catch (ParseException e) {
LOGGER.error("Error during query parsing", e);
return Optional.empty();
}
}

private SearchResults search(IndexSearcher indexSearcher, Query searchQuery) throws IOException {
TopDocs topDocs = indexSearcher.search(searchQuery, Integer.MAX_VALUE);
StoredFields storedFields = indexSearcher.storedFields();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ public static Stream<Arguments> searchConversion() {
Arguments.of(List.of(), "NOT a"),
Arguments.of(List.of("a", "b", "c"), "(any = a OR any = b) AND NOT (NOT c AND title = d)"),
Arguments.of(List.of("b", "c"), "title != a OR b OR c"),
Arguments.of(List.of("a", "b"), "a b")
Arguments.of(List.of("a", "b"), "a b"),
Arguments.of(List.of("term1 term2"), "\"term1 term2\"")
);
}

Expand Down
Loading

0 comments on commit 4d437c1

Please sign in to comment.