-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
53cd50c
commit 4d437c1
Showing
5 changed files
with
139 additions
and
138 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
169 changes: 74 additions & 95 deletions
169
src/main/java/org/jabref/logic/search/query/SearchToLuceneVisitor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,152 +1,131 @@ | ||
package org.jabref.logic.search.query; | ||
|
||
import java.util.EnumSet; | ||
import java.util.List; | ||
import java.util.Locale; | ||
|
||
import org.jabref.model.search.LinkedFilesConstants; | ||
import org.jabref.model.search.SearchFlags; | ||
import org.jabref.search.SearchBaseVisitor; | ||
import org.jabref.search.SearchParser; | ||
|
||
import org.apache.lucene.index.Term; | ||
import org.apache.lucene.search.BooleanClause; | ||
import org.apache.lucene.search.BooleanQuery; | ||
import org.apache.lucene.search.MatchNoDocsQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.RegexpQuery; | ||
import org.apache.lucene.search.TermQuery; | ||
import org.apache.lucene.util.QueryBuilder; | ||
import org.apache.lucene.queryparser.classic.QueryParser; | ||
|
||
/** | ||
* Tests are located in {@link org.jabref.logic.search.query.SearchQueryLuceneConversionTest}. | ||
*/ | ||
public class SearchToLuceneVisitor extends SearchBaseVisitor<Query> { | ||
public class SearchToLuceneVisitor extends SearchBaseVisitor<String> { | ||
private final EnumSet<SearchFlags> searchFlags; | ||
|
||
private static final List<String> SEARCH_FIELDS = LinkedFilesConstants.PDF_FIELDS; | ||
|
||
private final QueryBuilder queryBuilder; | ||
|
||
public SearchToLuceneVisitor() { | ||
this.queryBuilder = new QueryBuilder(LinkedFilesConstants.LINKED_FILES_ANALYZER); | ||
public SearchToLuceneVisitor(EnumSet<SearchFlags> searchFlags) { | ||
this.searchFlags = searchFlags; | ||
} | ||
|
||
@Override | ||
public Query visitStart(SearchParser.StartContext ctx) { | ||
public String visitStart(SearchParser.StartContext ctx) { | ||
return visit(ctx.andExpression()); | ||
} | ||
|
||
@Override | ||
public Query visitImplicitAndExpression(SearchParser.ImplicitAndExpressionContext ctx) { | ||
List<Query> children = ctx.expression().stream().map(this::visit).toList(); | ||
if (children.size() == 1) { | ||
return children.getFirst(); | ||
} | ||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
for (Query child : children) { | ||
builder.add(child, BooleanClause.Occur.MUST); | ||
} | ||
return builder.build(); | ||
public String visitImplicitAndExpression(SearchParser.ImplicitAndExpressionContext ctx) { | ||
List<String> children = ctx.expression().stream().map(this::visit).toList(); | ||
return children.size() == 1 ? children.getFirst() : String.join(" ", children); | ||
} | ||
|
||
@Override | ||
public Query visitParenExpression(SearchParser.ParenExpressionContext ctx) { | ||
return visit(ctx.andExpression()); | ||
public String visitParenExpression(SearchParser.ParenExpressionContext ctx) { | ||
String expr = visit(ctx.andExpression()); | ||
return expr.isEmpty() ? "" : "(" + expr + ")"; | ||
} | ||
|
||
@Override | ||
public Query visitNegatedExpression(SearchParser.NegatedExpressionContext ctx) { | ||
Query innerQuery = visit(ctx.expression()); | ||
if (innerQuery instanceof MatchNoDocsQuery) { | ||
return innerQuery; | ||
} | ||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
builder.add(innerQuery, BooleanClause.Occur.MUST_NOT); | ||
return builder.build(); | ||
public String visitNegatedExpression(SearchParser.NegatedExpressionContext ctx) { | ||
return "NOT (" + visit(ctx.expression()) + ")"; | ||
} | ||
|
||
@Override | ||
public Query visitBinaryExpression(SearchParser.BinaryExpressionContext ctx) { | ||
Query left = visit(ctx.left); | ||
Query right = visit(ctx.right); | ||
public String visitBinaryExpression(SearchParser.BinaryExpressionContext ctx) { | ||
String left = visit(ctx.left); | ||
String right = visit(ctx.right); | ||
|
||
if (left instanceof MatchNoDocsQuery) { | ||
if (left.isEmpty() && right.isEmpty()) { | ||
return ""; | ||
} | ||
if (left.isEmpty()) { | ||
return right; | ||
} | ||
if (right instanceof MatchNoDocsQuery) { | ||
if (right.isEmpty()) { | ||
return left; | ||
} | ||
|
||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
String operator = ctx.bin_op.getType() == SearchParser.AND ? " AND " : " OR "; | ||
return left + operator + right; | ||
} | ||
|
||
@Override | ||
public String visitComparison(SearchParser.ComparisonContext ctx) { | ||
String term = SearchQueryConversion.unescapeSearchValue(ctx.searchValue()); | ||
boolean isQuoted = ctx.searchValue().getStart().getType() == SearchParser.STRING_LITERAL; | ||
|
||
// unfielded expression | ||
if (ctx.FIELD() == null) { | ||
if (searchFlags.contains(SearchFlags.REGULAR_EXPRESSION)) { | ||
return "/" + term + "/"; | ||
} | ||
return isQuoted ? "\"" + escapeQuotes(term) + "\"" : QueryParser.escape(term); | ||
} | ||
|
||
if (ctx.bin_op.getType() == SearchParser.AND) { | ||
builder.add(left, BooleanClause.Occur.MUST); | ||
builder.add(right, BooleanClause.Occur.MUST); | ||
} else if (ctx.bin_op.getType() == SearchParser.OR) { | ||
builder.add(left, BooleanClause.Occur.SHOULD); | ||
builder.add(right, BooleanClause.Occur.SHOULD); | ||
String field = ctx.FIELD().getText().toLowerCase(Locale.ROOT); | ||
if (!isValidField(field)) { | ||
return ""; | ||
} | ||
|
||
return builder.build(); | ||
field = "any".equals(field) || "anyfield".equals(field) ? "" : field + ":"; | ||
int operator = ctx.operator().getStart().getType(); | ||
return buildFieldExpression(field, term, operator, isQuoted); | ||
} | ||
|
||
@Override | ||
public Query visitComparisonExpression(SearchParser.ComparisonExpressionContext ctx) { | ||
return visit(ctx.comparison()); | ||
private boolean isValidField(String field) { | ||
return "any".equals(field) || "anyfield".equals(field) || LinkedFilesConstants.PDF_FIELDS.contains(field); | ||
} | ||
|
||
@Override | ||
public Query visitComparison(SearchParser.ComparisonContext ctx) { | ||
String field = ctx.FIELD() == null ? null : ctx.FIELD().getText(); | ||
String term = SearchQueryConversion.unescapeSearchValue(ctx.searchValue()); | ||
private String buildFieldExpression(String field, String term, int operator, boolean isQuoted) { | ||
boolean isRegexOp = isRegexOperator(operator); | ||
boolean isNegationOp = isNegationOperator(operator); | ||
|
||
// unfielded expression | ||
if (field == null || "anyfield".equals(field) || "any".equals(field)) { | ||
return createMultiFieldQuery(term, ctx.operator()); | ||
} else if (SEARCH_FIELDS.contains(field)) { | ||
return createFieldQuery(field, term, ctx.operator()); | ||
if (isRegexOp) { | ||
String expression = field + "/" + term + "/"; | ||
return isNegationOp ? "NOT " + expression : expression; | ||
} else { | ||
return new MatchNoDocsQuery(); | ||
term = isQuoted ? "\"" + escapeQuotes(term) + "\"" : QueryParser.escape(term); | ||
String expression = field + term; | ||
return isNegationOp ? "NOT " + expression : expression; | ||
} | ||
} | ||
|
||
private Query createMultiFieldQuery(String value, SearchParser.OperatorContext operator) { | ||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
for (String field : SEARCH_FIELDS) { | ||
builder.add(createFieldQuery(field, value, operator), BooleanClause.Occur.SHOULD); | ||
} | ||
return builder.build(); | ||
private static String escapeQuotes(String term) { | ||
return term.replace("\"", "\\\""); | ||
} | ||
|
||
private Query createFieldQuery(String field, String value, SearchParser.OperatorContext operator) { | ||
if (operator == null) { | ||
return createTermOrPhraseQuery(field, value); | ||
} | ||
|
||
return switch (operator.getStart().getType()) { | ||
case SearchParser.REQUAL, | ||
SearchParser.CREEQUAL -> | ||
new RegexpQuery(new Term(field, value)); | ||
private static boolean isNegationOperator(int operator) { | ||
return switch (operator) { | ||
case SearchParser.NEQUAL, | ||
SearchParser.NCEQUAL, | ||
SearchParser.NEEQUAL, | ||
SearchParser.NCEEQUAL -> | ||
createNegatedQuery(createTermOrPhraseQuery(field, value)); | ||
case SearchParser.NREQUAL, | ||
SearchParser.NCREEQUAL -> | ||
createNegatedQuery(new RegexpQuery(new Term(field, value))); | ||
default -> | ||
createTermOrPhraseQuery(field, value); | ||
SearchParser.NCEEQUAL, | ||
SearchParser.NREQUAL, | ||
SearchParser.NCREEQUAL -> true; | ||
default -> false; | ||
}; | ||
} | ||
|
||
private Query createNegatedQuery(Query query) { | ||
BooleanQuery.Builder negatedQuery = new BooleanQuery.Builder(); | ||
negatedQuery.add(query, BooleanClause.Occur.MUST_NOT); | ||
return negatedQuery.build(); | ||
} | ||
|
||
private Query createTermOrPhraseQuery(String field, String value) { | ||
if (value.contains("*") || value.contains("?")) { | ||
return new TermQuery(new Term(field, value)); | ||
} | ||
return queryBuilder.createPhraseQuery(field, value); | ||
private static boolean isRegexOperator(int operator) { | ||
return switch (operator) { | ||
case SearchParser.REQUAL, | ||
SearchParser.CREEQUAL, | ||
SearchParser.NREQUAL, | ||
SearchParser.NCREEQUAL -> true; | ||
default -> false; | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.