diff --git a/integration-tests/run.sh b/integration-tests/run.sh index edd17b90..949f0ace 100755 --- a/integration-tests/run.sh +++ b/integration-tests/run.sh @@ -4,8 +4,7 @@ SOLR_HOST="${SOLR_HOST:-localhost}" SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" SOLR7_VERSIONS="7.7 7.6 7.5" SOLR8_VERSIONS="8.11 8.10 8.9 8.8 8.7 8.6 8.5 8.4 8.3 8.2 8.1 8.0" -SOLR9_VERSIONS="9.6 9.5 9.4 9.3 9.2 9.1 9.0" -SOLR78_PLUGIN_PATH=/tmp/solrocr-solr78 +SOLR9_VERSIONS="9.7 9.6 9.5 9.4 9.3 9.2 9.1 9.0" wait_for_solr() { status="404" diff --git a/pom.xml b/pom.xml index fb2f89c0..749f2cc4 100644 --- a/pom.xml +++ b/pom.xml @@ -56,8 +56,8 @@ 5.10.2 5.10.0 2.0.12 - 9.6.0 - 9.10.0 + 9.7.0 + 9.11.1 2.9.1 3.2.0 @@ -223,23 +223,6 @@ - - org.apache.maven.plugins - maven-javadoc-plugin - ${version.maven-javadoc-plugin} - - -Xdoclint:none - -Xdoclint:none - - - - attach-javadocs - - jar - - - - org.apache.maven.plugins maven-source-plugin diff --git a/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java b/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java index ff5086fc..9cb6edbc 100644 --- a/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java +++ b/src/main/java/com/github/dbmdz/solrocr/lucene/OcrFieldHighlighter.java @@ -42,7 +42,7 @@ import org.apache.lucene.util.BytesRef; /** A customization of {@link FieldHighlighter} to support OCR fields */ -public class OcrFieldHighlighter extends FieldHighlighter { +public class OcrFieldHighlighter extends FieldHighlighterAdapter { private final ConcurrentHashMap numMatches; public OcrFieldHighlighter( @@ -51,8 +51,7 @@ public OcrFieldHighlighter( PassageScorer passageScorer, int maxPassages, int maxNoHighlightPassages) { - super( - field, fieldOffsetStrategy, null, passageScorer, maxPassages, maxNoHighlightPassages, null); + super(field, fieldOffsetStrategy, passageScorer, maxPassages, maxNoHighlightPassages); this.numMatches = new ConcurrentHashMap<>(); } diff --git a/src/main/java/com/github/dbmdz/solrocr/util/LuceneVersionInfo.java b/src/main/java/com/github/dbmdz/solrocr/util/LuceneVersionInfo.java new file mode 100644 index 00000000..3176cb74 --- /dev/null +++ b/src/main/java/com/github/dbmdz/solrocr/util/LuceneVersionInfo.java @@ -0,0 +1,10 @@ +package com.github.dbmdz.solrocr.util; + +import org.apache.lucene.util.Version; + +public class LuceneVersionInfo { + public static boolean versionIsBefore(int major, int minor) { + return Version.LATEST.major < major + || (Version.LATEST.major == major && Version.LATEST.minor < minor); + } +} diff --git a/src/main/java/solrocr/OcrHighlighter.java b/src/main/java/solrocr/OcrHighlighter.java index 6e2dfa41..b67809f4 100644 --- a/src/main/java/solrocr/OcrHighlighter.java +++ b/src/main/java/solrocr/OcrHighlighter.java @@ -42,6 +42,7 @@ import com.github.dbmdz.solrocr.reader.SourceReader; import com.github.dbmdz.solrocr.reader.StringSourceReader; import com.github.dbmdz.solrocr.solr.OcrHighlightParams; +import com.github.dbmdz.solrocr.util.LuceneVersionInfo; import com.github.dbmdz.solrocr.util.TimeAllowedLimit; import com.google.common.collect.ImmutableSet; import java.io.IOException; @@ -85,7 +86,6 @@ import org.apache.lucene.search.uhighlight.UnifiedHighlighter; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.SolrParams; @@ -110,14 +110,6 @@ public class OcrHighlighter extends UnifiedHighlighter { private static final int DEFAULT_SNIPPET_LIMIT = 100; public static final String PARTIAL_OCR_HIGHLIGHTS = "partialOcrHighlights"; - private static final boolean VERSION_IS_PRE81 = - Version.LATEST.major < 8 || (Version.LATEST.major == 8 && Version.LATEST.minor < 1); - private static final boolean VERSION_IS_PRE82 = - VERSION_IS_PRE81 || (Version.LATEST.major == 8 && Version.LATEST.minor < 2); - private static final boolean VERSION_IS_PRE84 = - VERSION_IS_PRE82 || (Version.LATEST.major == 8 && Version.LATEST.minor < 4); - private static final boolean VERSION_IS_PRE89 = - VERSION_IS_PRE82 || (Version.LATEST.major == 8 && Version.LATEST.minor < 9); private static final Constructor hlComponentsConstructorLegacy; private static final Method offsetSourceGetterLegacy; private static final Method extractAutomataLegacyMethod; @@ -169,7 +161,7 @@ public class OcrHighlighter extends UnifiedHighlighter { } }; - if (VERSION_IS_PRE81) { + if (LuceneVersionInfo.versionIsBefore(8, 1)) { @SuppressWarnings("rawtypes") Class multiTermHl = Class.forName("org.apache.lucene.search.uhighlight.MultiTermHighlighting"); @@ -180,7 +172,7 @@ public class OcrHighlighter extends UnifiedHighlighter { throw new RuntimeException( "Could not make `extractAutomata` accessible, are you running a SecurityManager?"); } - } else if (VERSION_IS_PRE84) { + } else if (LuceneVersionInfo.versionIsBefore(8, 4)) { @SuppressWarnings("rawtypes") Class multiTermHl = Class.forName("org.apache.lucene.search.uhighlight.MultiTermHighlighting"); @@ -194,7 +186,7 @@ public class OcrHighlighter extends UnifiedHighlighter { } else { extractAutomataLegacyMethod = null; } - if (VERSION_IS_PRE82) { + if (LuceneVersionInfo.versionIsBefore(8, 2)) { //noinspection JavaReflectionMemberAccess hlComponentsConstructorLegacy = UHComponents.class.getDeclaredConstructor( @@ -212,7 +204,7 @@ public class OcrHighlighter extends UnifiedHighlighter { BytesRef[].class, PhraseHelper.class, CharacterRunAutomaton[].class); - } else if (VERSION_IS_PRE84) { + } else if (LuceneVersionInfo.versionIsBefore(8, 4)) { //noinspection JavaReflectionMemberAccess hlComponentsConstructorLegacy = UHComponents.class.getDeclaredConstructor( @@ -649,7 +641,7 @@ private OcrFieldHighlighter getOcrFieldHighlighter( String field, Query query, Set allTerms, int maxPassages) { // This method and some associated types changed in v8.2 and v8.4, so we have to delegate to an // adapter method for these versions - if (VERSION_IS_PRE84) { + if (LuceneVersionInfo.versionIsBefore(8, 4)) { return getOcrFieldHighlighterLegacy(field, query, allTerms, maxPassages); } @@ -690,7 +682,7 @@ private OcrFieldHighlighter getOcrFieldHighlighterLegacy( // older versions OffsetSource offsetSource; UHComponents components; - if (VERSION_IS_PRE82) { + if (LuceneVersionInfo.versionIsBefore(8, 2)) { offsetSource = this.getOffsetSourcePre82(field, terms, phraseHelper, automata); components = this.getUHComponentsPre82( @@ -728,7 +720,7 @@ private CharacterRunAutomaton[] extractAutomataLegacy( Query query, Predicate fieldMatcher, boolean lookInSpan) { Function> nopWriteFn = q -> null; try { - if (VERSION_IS_PRE81) { + if (LuceneVersionInfo.versionIsBefore(8, 1)) { return (CharacterRunAutomaton[]) extractAutomataLegacyMethod.invoke(null, query, fieldMatcher, lookInSpan, nopWriteFn); } else { @@ -898,7 +890,7 @@ static IndexReader wrap(IndexReader reader) throws IOException { .map(LeafReaderContext::reader) .map(TermVectorReusingLeafReader::new) .toArray(LeafReader[]::new); - if (VERSION_IS_PRE89) { + if (LuceneVersionInfo.versionIsBefore(8, 9)) { return new LegacyBaseCompositeReader(leafReaders) { @Override protected void doClose() throws IOException { diff --git a/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.class b/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.class new file mode 100644 index 00000000..8feee478 Binary files /dev/null and b/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.class differ diff --git a/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm b/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm new file mode 100644 index 00000000..08a72949 --- /dev/null +++ b/src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm @@ -0,0 +1,105 @@ +/** This is a small adapter class that allows inheriting from + * FieldHighlighter from Lucene versions older than 9.11 and + * newer. There was a breaking change in Lucene 9.11 that added + * an 8th parameter to the constructor, breaking backwards + * compatibility. + * + * This cannot be worked around at the Java source level, due to + * strict requirements surrounding the `super()` call in a + * subclass constructor. + * + * However, using JVM bytecode, we can easily work around that + * and implement a class that can dynamically select the superclass + * constructor based on the Lucene version. + */ +public class com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter +extends org/apache/lucene/search/uhighlight/FieldHighlighter { + + /** This constructor is a simple adapter that forwards the + * parameters to the correct superclass constructor based on + * the Lucene version. + * + * The bytecode corresponds to the following (illegal) Java code: + * + * ```java + * public FieldHighlighterAdapter( + * String fieldName, + * FieldOffsetStrategy fieldOffsetStrategy, + * PassageScorer passageScorer, + * int maxPassages, + * int maxNoHighlightPassages + * ) { + * if (LuceneVersionInfo.versionIsBefore(9, 11)) { + * super(fieldName, fieldOffsetStrategy, null, passageScorer, maxPassages, maxNoHighlightPassages, null); + * } else { + * super(fieldName, fieldOffsetStrategy, null, passageScorer, maxPassages, maxNoHighlightPassages, null, null); + * } + * } + * ``` + * + * @param fieldName The name of the field to highlight + * @param fieldOffsetStrategy The strategy to use for field + * offsets + * @param passageScorer The scorer to use for passages + * @param maxPassages The maximum number of passages to return + * @param maxNoHighlightPassages The maximum number of passages + * to return if no highlighting is possible + */ + public ( + java/lang/String, + org/apache/lucene/search/uhighlight/FieldOffsetStrategy, + org/apache/lucene/search/uhighlight/PassageScorer, + int, + int + ) void { + + // Load common constructor parameters from method params onto the stack + aload 0 // `this`, i.e. object reference + aload 1 // String fieldName + aload 2 // FieldOffsetStrategy + aconst_null // BreakIterator + aload 3 // PassageScorer + iload 4 // int maxPassages + iload 5 // int maxNoHighlightPassages + aconst_null // PassageFormatter + + // Check if Lucene version is lower than 9.11 + bipush 9 // major version + bipush 11 // minor version + invokestatic com/github/dbmdz/solrocr/util/LuceneVersionInfo.versionIsBefore(int, int) boolean + + // go to new constructor if return value was false + ifeq NEW_CONSTRUCTOR + + // Version check indicated a version <9.11, so we call the old + // constructor signature with 7 parameters + invokespecial org/apache/lucene/search/uhighlight/FieldHighlighter.( + java/lang/String, + org/apache/lucene/search/uhighlight/FieldOffsetStrategy, + java/text/BreakIterator, + org/apache/lucene/search/uhighlight/PassageScorer, + int, + int, + org/apache/lucene/search/uhighlight/PassageFormatter + ) void + goto BEACH + +NEW_CONSTRUCTOR: + // Versions >= 9.7 need a Comparator as the 8th parameter for the + // new constructor signature + aconst_null // Comparator + invokespecial org/apache/lucene/search/uhighlight/FieldHighlighter.( + java/lang/String, + org/apache/lucene/search/uhighlight/FieldOffsetStrategy, + java/text/BreakIterator, + org/apache/lucene/search/uhighlight/PassageScorer, + int, + int, + org/apache/lucene/search/uhighlight/PassageFormatter, + java/util/Comparator + ) void + +BEACH: + return + } +} diff --git a/src/main/resources/com/github/dbmdz/solrocr/lucene/README.md b/src/main/resources/com/github/dbmdz/solrocr/lucene/README.md new file mode 100644 index 00000000..85422af2 --- /dev/null +++ b/src/main/resources/com/github/dbmdz/solrocr/lucene/README.md @@ -0,0 +1,27 @@ +`FieldHighlighterAdapter.class` is a hand-crafted Java class that dynamically selects a +`FieldHighlighter` constructor based on the Solr/Lucene version. + +This is neccessary because the `FieldHighlighter` class has changed its constructor signature +between Solr 9.6 and 9.7, introduction an 8th parameter. + +Since dynamically selecting a superclass constructor to call isn't posssible at the Java source +level, we have to drop down to the bytecode level to achieve this. + +The classs file is defined in the `FieldHighlighterAdapter.jasm` file, which is a [jasm](1) assembly +file. + +To compile the class file, [download `jasm`](2) and run it on the `.jasm` file from the project root: + +```bash +$ ./jasm-0.7.0/bin/jasm src/main/resources/com/github/dbmdz/solrocr/lucene/FieldHighlighterAdapter.jasm +``` + +I tried for multiplpe hours to get this to build automatically as part of the Maven build, but had +to give up, Maven just is too painful for this sort of thing. + +As to why we don't use the same bytecode patching technique as we did for the Solr 7 -> 8 API breakage, +the reason is that this is a breakage within a single major version, if we created a separate JAR for +each and everyone of those, we'd end up with a huge number of JARs over the years, which is not ideal. + +[1]: https://github.com/roscopeco/jasm +[2]: https://github.com/roscopeco/jasm/releases \ No newline at end of file