diff --git a/pom.xml b/pom.xml index 749f2cc4..0c120e03 100644 --- a/pom.xml +++ b/pom.xml @@ -223,6 +223,23 @@ + + org.apache.maven.plugins + maven-javadoc-plugin + ${version.maven-javadoc-plugin} + + -Xdoclint:none + -Xdoclint:none + + + + attach-javadocs + + jar + + + + org.apache.maven.plugins maven-source-plugin diff --git a/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java b/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java index 9cb6edbc..bb3bf8f1 100644 --- a/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java +++ b/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java @@ -38,21 +38,33 @@ import org.apache.lucene.search.uhighlight.FieldOffsetStrategy; import org.apache.lucene.search.uhighlight.OffsetsEnum; import org.apache.lucene.search.uhighlight.Passage; +import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.PassageScorer; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter; import org.apache.lucene.util.BytesRef; /** A customization of {@link FieldHighlighter} to support OCR fields */ -public class OcrFieldHighlighter extends FieldHighlighterAdapter { +public class OcrFieldHighlighter { private final ConcurrentHashMap numMatches; + private final String field; + private final FieldOffsetStrategy fieldOffsetStrategy; + private final PassageScorer passageScorer; + private final int maxPassages; + private final int maxNoHighlightPassages; + public OcrFieldHighlighter( String field, FieldOffsetStrategy fieldOffsetStrategy, PassageScorer passageScorer, int maxPassages, int maxNoHighlightPassages) { - super(field, fieldOffsetStrategy, passageScorer, maxPassages, maxNoHighlightPassages); this.numMatches = new ConcurrentHashMap<>(); + this.field = field; + this.fieldOffsetStrategy = fieldOffsetStrategy; + this.passageScorer = passageScorer; + this.maxPassages = maxPassages; + this.maxNoHighlightPassages = maxNoHighlightPassages; } /** @@ -111,7 +123,6 @@ public OcrSnippet[] highlightFieldForDoc( } } - @Override protected Passage[] highlightOffsetsEnums(OffsetsEnum off) { throw new UnsupportedOperationException(); } @@ -265,11 +276,14 @@ private Passage maybeAddPassage( } /** We don't provide summaries if there is no highlighting, i.e. no matches in the OCR text */ - @Override protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) { return new Passage[] {}; } + public UnifiedHighlighter.OffsetSource getOffsetSource() { + return this.fieldOffsetStrategy.getOffsetSource(); + } + public int getNumMatches(int docId) { return numMatches.getOrDefault(docId, -1); } diff --git a/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.class b/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.class deleted file mode 100644 index b106ab05..00000000 Binary files a/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.class and /dev/null differ diff --git a/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm b/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm deleted file mode 100644 index bc1c3773..00000000 --- a/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm +++ /dev/null @@ -1,105 +0,0 @@ -/** This is a small adapter class that allows inheriting from - * FieldHighlighter from Lucene versions older than 9.11 and - * newer. There was a breaking change in Lucene 9.11 that added - * an 8th parameter to the constructor, breaking backwards - * compatibility. - * - * This cannot be worked around at the Java source level, due to - * strict requirements surrounding the `super()` call in a - * subclass constructor. - * - * However, using JVM bytecode, we can easily work around that - * and implement a class that can dynamically select the superclass - * constructor based on the Lucene version. - */ -public class com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter -extends org/apache/lucene/search/uhighlight/FieldHighlighter { - - /** This constructor is a simple adapter that forwards the - * parameters to the correct superclass constructor based on - * the Lucene version. - * - * The bytecode corresponds to the following (illegal) Java code: - * - * ```java - * public FieldHighlighterAdapter( - * String fieldName, - * FieldOffsetStrategy fieldOffsetStrategy, - * PassageScorer passageScorer, - * int maxPassages, - * int maxNoHighlightPassages - * ) { - * if (LuceneVersionInfo.versionIsBefore(9, 11)) { - * super(fieldName, fieldOffsetStrategy, null, passageScorer, maxPassages, maxNoHighlightPassages, null); - * } else { - * super(fieldName, fieldOffsetStrategy, null, passageScorer, maxPassages, maxNoHighlightPassages, null, null); - * } - * } - * ``` - * - * @param fieldName The name of the field to highlight - * @param fieldOffsetStrategy The strategy to use for field - * offsets - * @param passageScorer The scorer to use for passages - * @param maxPassages The maximum number of passages to return - * @param maxNoHighlightPassages The maximum number of passages - * to return if no highlighting is possible - */ - public ( - java/lang/String, - org/apache/lucene/search/uhighlight/FieldOffsetStrategy, - org/apache/lucene/search/uhighlight/PassageScorer, - int, - int - ) void { - - // Load common constructor parameters from method params onto the stack - aload 0 // `this`, i.e. object reference - aload 1 // String fieldName - aload 2 // FieldOffsetStrategy - aconst_null // BreakIterator - aload 3 // PassageScorer - iload 4 // int maxPassages - iload 5 // int maxNoHighlightPassages - aconst_null // PassageFormatter - - // Check if Lucene version is lower than 9.11 - bipush 9 // major version - bipush 11 // minor version - invokestatic com/github/dbmdz/solrocr/util/LuceneVersionInfo.versionIsBefore(int, int) boolean - - // go to new constructor if return value was false - ifeq NEW_CONSTRUCTOR - - // Version check indicated a version <9.11, so we call the old - // constructor signature with 7 parameters - invokespecial org/apache/lucene/search/uhighlight/FieldHighlighter.( - java/lang/String, - org/apache/lucene/search/uhighlight/FieldOffsetStrategy, - java/text/BreakIterator, - org/apache/lucene/search/uhighlight/PassageScorer, - int, - int, - org/apache/lucene/search/uhighlight/PassageFormatter - ) void - goto BEACH - -NEW_CONSTRUCTOR: - // Versions >= 9.11 need a Comparator as the 8th parameter for the - // new constructor signature - aconst_null // Comparator - invokespecial org/apache/lucene/search/uhighlight/FieldHighlighter.( - java/lang/String, - org/apache/lucene/search/uhighlight/FieldOffsetStrategy, - java/text/BreakIterator, - org/apache/lucene/search/uhighlight/PassageScorer, - int, - int, - org/apache/lucene/search/uhighlight/PassageFormatter, - java/util/Comparator - ) void - -BEACH: - return - } -} diff --git a/src/main/resources/com/github/dbmdz/solrocr/lucene/README.md b/src/main/resources/com/github/dbmdz/solrocr/lucene/README.md deleted file mode 100644 index 85422af2..00000000 --- a/src/main/resources/com/github/dbmdz/solrocr/lucene/README.md +++ /dev/null @@ -1,27 +0,0 @@ -`FieldHighlighterAdapter.class` is a hand-crafted Java class that dynamically selects a -`FieldHighlighter` constructor based on the Solr/Lucene version. - -This is neccessary because the `FieldHighlighter` class has changed its constructor signature -between Solr 9.6 and 9.7, introduction an 8th parameter. - -Since dynamically selecting a superclass constructor to call isn't posssible at the Java source -level, we have to drop down to the bytecode level to achieve this. - -The classs file is defined in the `FieldHighlighterAdapter.jasm` file, which is a [jasm](1) assembly -file. - -To compile the class file, [download `jasm`](2) and run it on the `.jasm` file from the project root: - -```bash -$ ./jasm-0.7.0/bin/jasm src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm -``` - -I tried for multiplpe hours to get this to build automatically as part of the Maven build, but had -to give up, Maven just is too painful for this sort of thing. - -As to why we don't use the same bytecode patching technique as we did for the Solr 7 -> 8 API breakage, -the reason is that this is a breakage within a single major version, if we created a separate JAR for -each and everyone of those, we'd end up with a huge number of JARs over the years, which is not ideal. - -[1]: https://github.com/roscopeco/jasm -[2]: https://github.com/roscopeco/jasm/releases \ No newline at end of file