From d6c29d7766a1aa4860f8b65000e19ad407c73937 Mon Sep 17 00:00:00 2001
From: Johannes Baiter <johannes.baiter@bsb-muenchen.de>
Date: Tue, 31 May 2022 16:41:12 +0200
Subject: [PATCH] Update docs, use material theme

- Updated CI config to publish documentation to gh-pages
- Updated namespaces in examples to reflect recent change
- Updated changelog to always have a link to the GitHub release page
- Added references to the new package management repository
- mkdocs-material allows us to highlight individual lines in code
  blocks, which should help with the readability of the configuration
  snippets.
---
 .github/workflows/ci.yml | 47 +++++++++++++++++++
 docs/alternatives.md     |  2 +-
 docs/changes.md          | 69 ++++++++++++++++++++++++++++
 docs/example.md          | 23 +++++-----
 docs/formats.md          |  2 +-
 docs/index.md            | 12 ++---
 docs/indexing.md         | 12 ++---
 docs/installation.md     | 97 ++++++++++++++++++++++++++++------------
 docs/performance.md      | 11 +++--
 mkdocs.yml               | 42 ++++++++++-------
 10 files changed, 246 insertions(+), 71 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a2c07dcc..01f692b5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,3 +39,50 @@ jobs:
         run: chmod -R a+rw ./target
       - name: Run integration tests
         run: ./integration-tests/run.sh
+
+  publish_mkdocs_latest:
+    if: github.event_name == 'push' and contains(github.ref, 'refs/tags/')
+    runs-on: ubuntu-latest
+    needs: build
+    env:
+      GIT_COMMITTER_NAME: mkdocs-mike
+      GIT_COMMITTER_EMAIL: mkdocs-mike@nowhere.tld
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.10
+          cache: 'pip'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install mkdocs singledispatch mkdocs-material mike
+      - name: Deploy documentation with mike
+        run: mike deploy ${{github.ref_name}} latest
+      - name: Push gh-pages branch
+        run: git push origin gh-pages:gh-pages
+
+
+  publish_mkdocs_wip:
+    if: github.event_name == 'push' and contains(github.ref, 'main')
+    runs-on: ubuntu-latest
+    needs: build
+    env:
+      GIT_COMMITTER_NAME: mkdocs-mike
+      GIT_COMMITTER_EMAIL: mkdocs-mike@nowhere.tld
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.10
+          cache: 'pip'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install mkdocs singledispatch mkdocs-material mike
+      - name: Deploy documentation with mike
+        run: mike deploy wip
+      - name: Push gh-pages branch
+        run: git push origin gh-pages:gh-pages
diff --git a/docs/alternatives.md b/docs/alternatives.md
index c44ebb4f..280f7f9b 100644
--- a/docs/alternatives.md
+++ b/docs/alternatives.md
@@ -43,7 +43,7 @@ chain. This component must to be placed **after the tokenizer**:
 
 A full field definition for an OCR field with alternative expansion could look like this:
 
-```xml
+```xml hl_lines="3 4 5 6 7 9"
 <fieldType name="text_ocr" class="solr.TextField">
   <analyzer type="index">
     <charFilter class="solrocr.ExternalUtf8ContentFilterFactory"/>
diff --git a/docs/changes.md b/docs/changes.md
index 4aba2500..44e3cec0 100644
--- a/docs/changes.md
+++ b/docs/changes.md
@@ -1,16 +1,67 @@
+## 0.8.0 (2022-05-??)
+The major improvement in this version is compatibility with Solr 9.
+
+Due to a number of API changes in Solr and Lucene, we now have to ship two separate releases,
+one for Solr 7 and 8 and one for Solr 9, so please take extra care when downloading to pick
+the correct release. In the Package Repository, the Solr 7/8 release will always have version
+with the suffix `-solr78`.
+
+We also **changed the package namespaces** for all user-facing components so they are easier
+to identify and write. What this means is that you will need to change the `class="..."`
+attributes in your  `solrconfig.xml` and `schema.xml` to match the new package namespaces.
+Whenever you previously had `de.digitalcollections.solrocr.<other stuff>.ClassName`, you
+now have to simply write `solrocr.ClassName`.
+
+**New Features:**
+
+- For users running Solr in the Solrcloud mode, the plugin can now be installed via Solr's
+  [Package Manager](https://solr.apache.org/guide/solr/latest/configuration-guide/package-manager.html):
+  ```
+  $ bin/solr package add-repo dbmdz.github.io https://dbmdz.github.io/solr
+  $ bin/solr package install ocrhighlighting  # For Solr 9
+  $ bin/solr package install ocrhighlighting:0.8.0-solr78  # For Solr 7 and 8
+  ```
+  Note that Solr 7/8 users need to manually specify the version.
+
+**API changes:**
+
+- Changed deployment process to use two separate packages, one for Solr 9 and later and one for Solr 7/8, with a `-solr78.jar` suffix
+- Changed namespace of all user-facing components to simply `solrocr` and moved all
+  user-facing component classes to it:
+    * `de.digitalcollections.solrocr.lucene.filters.OcrCharFilterFactory`<br>
+      → `solrocr.OcrCharFilterFactory`
+    * `de.digitalcollections.solrocr.lucene.filters.ExternalUtf8ContentFilterFactory`<br>
+      → `solrocr.ExternalUtf8ContentFilterFactory`
+    * `de.digitalcollections.solrocr.lucene.OcrAlternativesFilterFactory`<br>
+      → `solrocr.OcrAlternativesFilterFactory`
+    * `de.digitalcollections.solrocr.lucene.OcrHighlightComponent`<br>
+      → `solrocr.OcrHighlightComponent`
+
+**Bugfixes**
+
+- Fix handling of quoted property values in hOCR title tags. We deviate a bit from the spec
+  to be more compatible with existing real-world data: Values like `x_source` can now either
+  be quoted in single- or double-quotes, or not at all, the parser will handle every case.
+
 ## 0.7.2 (2022-03-22)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.7.2)
+
 And yet another bugfix release.
 
 **Bugfixes:**
+
 - Fixed using single-quotes in MiniOCR input, previously these files were not recognized as valid MiniOCR files
   ([#247](https://github.com/dbmdz/solr-ocrhighlighting/pull/247), thanks @mspalti for the fix!)
 - Fixed `OutOfBoundsException` when using alternatives with very long tokens (
   [#230](https://github.com/dbmdz/solr-ocrhighlighting/pull/230), thanks @fd17 for the report and review)
 
 ## 0.7.1 (2021-09-24)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.7.1)
+
 Another bugfix release, upgrading is recommended.
 
 **Bugfixes:**
+
 - Fix text display and "number of snippets" slider in demo setup
 - Fix instances where we were using Java SDK methods that relied on a default locale, which led to
   hard-to-debug issues in some locales
@@ -20,6 +71,8 @@ Another bugfix release, upgrading is recommended.
 - Fix issue with namespaced ALTO documents
 
 ## 0.7.0 (2021-07-12)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.7.0)
+
 This is a bugfix release, especially users with ALTO files are encouraged to upgrade. Other than
 bugfixes, this is the first release to support Solr 8.9.
 
@@ -30,6 +83,8 @@ bugfixes, this is the first release to support Solr 8.9.
 - Fix issue when an hOCR file had empty OCR boxes
 
 ## 0.6.0 (2021-05-11)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.6.0)
+
 This is a major new release with significant improvements in stability, accuracy and most importantly performance.
 Updating is **highly** recommended, especially for ALTO users, who can expect a speed-up in indexing of up to
 **6000% (i.e. 60x as fast)**. We also recommend updating your JVM to at least Java 11 (LTS), since Java 9 introduced
@@ -77,6 +132,7 @@ significantly.
 
 
 **API changes:**
+
 - **No more need for an explicit `hl.fl` parameter for highlighting non-OCR fields.** By default,
   if highlighting is enabled and  no `hl.fl` parameter is passed by the user, Solr falls back to
   highlighting every stored field  in the document. Previously this did not work with the plugin and
@@ -93,6 +149,7 @@ significantly.
     See the above section unter *New Features* for an explanation of this flag.
 
 **Bugfixes:**
+
 - **Improved tolerance for incomplete bounding boxes.** Previously the occurrence of an incomplete
   bounding box in a snippet (i.e. with one or more missing coordinates) would crash the whole query.
   We now simply insert a `0` default value in these cases.
@@ -103,9 +160,12 @@ significantly.
   the OCR parsers would try to either load a file from the empty string or parse OCR markup from it.
 
 ## 0.5.0 (2020-10-07)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.5.0)
+
 No breaking changes this time around, but a few essential bugfixes, more stability and a new feature.
 
 **API changes:**
+
 - **Snippets are now sorted by their descending score/relevancy.** Previously the order was non-deterministic, which
   broke the use case for dynamically fetching more snippets.
 - **Add a new boolean `hl.ocr.alignSpans` parameter to align text and image spans.** This new option (disabled by
@@ -113,6 +173,7 @@ No breaking changes this time around, but a few essential bugfixes, more stabili
   to correspond to actual OCR word boundaries.
 
 **Bugfixes:**
+
 - **Fix regular highlighting in distributed setup.** Regular, non-OCR highlighting was broken in previous versions due
   to a bad check in the shard response collection phase if users only requested regular highlighting, but not for OCR
   fields
@@ -125,9 +186,12 @@ No breaking changes this time around, but a few essential bugfixes, more stabili
 
 
 ## 0.4.1 (2020-06-02)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.4.1)
+
 This is a patch release with a fix for excessive memory usage during indexing.
 
 ## 0.4.0 (2020-05-11)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.4.0)
 
 This is a major release with a focus on compatibility and performance.
 
@@ -136,6 +200,7 @@ This is a major release with a focus on compatibility and performance.
  the future.
 
 **Breaking API changes:**
+
 - **Add new `pages` key to snippet response with page dimensions**. This can be helpful if you need to calculate
   the snippet coordinates relative to the page image dimensions.
 - **Replace `page` key on regions and highlights with `pageIdx`**. That is, instead of a string with the
@@ -146,11 +211,13 @@ This is a major release with a focus on compatibility and performance.
   disjunct parts of the page or even multiple pages.
 
 **Format changes:**
+
 - hocr: Add support for retrieving page identifier from `x_source` an `ppageno` properties
 - hocr: Strip out title tag during indexing and highlighting
 - ALTO: The plugin now supports ALTO files with coordinates expressed as floating point numbers (thanks to @mspalti!)
 
 **Performance:**
+
 - Add concurrent preloading for highlighting target files. This can result in a nice performance boost, since by the
   time the plugin gets to actually highlighting the files, their contents are already in the OS' page cache. See
   the [Performance Tuning section in the docs](https://dbmdz.github.io/solr-ocrhighlighting/performance/) for more
@@ -159,12 +226,14 @@ This is a major release with a focus on compatibility and performance.
   compared to previous versions.
 
 **Miscellaneous:**
+
 - Log warnings during source pointer parsing
 - Filter out empty files during indexing
 - Add new documentation section on performance tuning
 - Empty regions or regions with only whitespace are no longer included in the output
 
 ## 0.3.1 (2019-07-26)
+[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.3.1)
 
 This is patch release that fixes compatibility with Solr/Lucene 8.2.
 
diff --git a/docs/example.md b/docs/example.md
index b3bb00cd..ca20a6d0 100644
--- a/docs/example.md
+++ b/docs/example.md
@@ -44,17 +44,17 @@ To run the example setup yourself, you will need:
 ## Solr Configuration Walkthrough
 
 [`solrconfig.xml`](https://github.com/dbmdz/solr-ocrhighlighting/blob/master/example/solr/cores/ocr/conf/solrconfig.xml)
-```xml
+```xml hl_lines="7 10 11 21"
 <config>
-  <luceneMatchVersion>7.6</luceneMatchVersion>
-  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
-  <schemaFactory class="ClassicIndexSchemaFactory"/>
+  <luceneMatchVersion>9.0</luceneMatchVersion>
 
-  <!-- Load the plugin JAR from the contrib directory -->
+  <!-- Load the plugin JAR from the contrib directory.
+       NOTE: Not needed when running with Solrcloud and Package Manager.
+  -->
   <lib dir="../../../contrib/ocrsearch/lib" regex=".*\.jar" />
 
   <!-- Define a search component that takes care of OCR highlighting -->
-  <searchComponent class="de.digitalcollections.solrocr.solr.OcrHighlightComponent"
+  <searchComponent class="solrocr.OcrHighlightComponent"
                    name="ocrHighlight" />
 
   <!-- Add the OCR Highlighting component to the request handler -->
@@ -73,11 +73,14 @@ To run the example setup yourself, you will need:
 ```
 
 [`schema.xml`](https://github.com/dbmdz/solr-ocrhighlighting/blob/master/example/solr/cores/ocr/conf/schema.xml)
-```xml
-<fieldtype name="text_ocr" class="solr.TextField" storeOffsetsWithPositions="true" termVectors="true">
+```xml hl_lines="4 5 6 7"
+<fieldtype
+    name="text_ocr" class="solr.TextField" storeOffsetsWithPositions="true">
   <analyzer type="index">
-    <charFilter class="de.digitalcollections.solrocr.lucene.filters.ExternalUtf8ContentFilterFactory" />
-    <charFilter class="de.digitalcollections.solrocr.lucene.filters.OcrCharFilterFactory" />
+    <charFilter
+      class="solrocr.ExternalUtf8ContentFilterFactory" />
+    <charFilter
+      class="solrocr.OcrCharFilterFactory" />
     <tokenizer class="solr.StandardTokenizerFactory"/>
     <filter class="solr.LowerCaseFilterFactory"/>
     <filter class="solr.StopFilterFactory"/>
diff --git a/docs/formats.md b/docs/formats.md
index 3929b7f2..22310956 100644
--- a/docs/formats.md
+++ b/docs/formats.md
@@ -56,7 +56,7 @@ You should use this format when:
 
 - you want to store the OCR in the index (to keep the index size as low)
 - reusing the existing OCR files is not possible or practical (to keep occupied disk space low)
-- you want the best possible performance, highlighting MiniOCR is ~25% faster than ALTO and ~50% faster than hOCR
+- you want the best possible performance, highlighting MiniOCR is ~25% faster than ALTO and ~50% faster than hOCR (in an artificial benchmark that is purely CPU-bound)
 
 A basic example looks like this:
 
diff --git a/docs/index.md b/docs/index.md
index 8d568adf..0eb58a60 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,4 +1,4 @@
-# Solr OCR Highlighting
+# Solr OCR Highlighting Plugin
 
 ![Highlighted OCR snippet](img/snippet.png)
 
@@ -21,8 +21,8 @@ and its position on the page at query time:
     { "ulx": 196, "uly": 1703, "lrx": 1232, "lry": 1968, "pageIdx": 0 }
   ],
   "highlights":[
-    [{ "text": "Mason and Jeremiah", "ulx": 675, "uly": 110, "lrx": 1036, "lry": 145,
-       "parentRegionIdx": 0},
+    [{ "text": "Mason and Jeremiah", "ulx": 675, "uly": 110, "lrx": 1036,
+       "lry": 145, "parentRegionIdx": 0},
      { "text": "Dixon,", "ulx": 1, "uly": 167, "lrx": 119, "lry": 204,
        "parentRegionIdx": 0 }]
   ]
@@ -41,7 +41,7 @@ does not interfere with Solr's standard highlighting component, i.e. it works
 transparently with non-OCR fields and just lets the default implementation handle
 those.
 
-The plugin **works with all Solr versions >= 7.x**.
+The plugin **works with all Solr versions >= 7.x up to 9.0**.
 
 ## Features
 - Index various [OCR formats](formats.md) directly without any pre-processing
@@ -49,7 +49,7 @@ The plugin **works with all Solr versions >= 7.x**.
     * [ALTO](formats.md#alto)
     * [MiniOCR](formats.md#miniocr)
 - Retrieve all the information needed to render a highlighted snippet view
-  directly from Solr, without post-processing
+  directly from Solr, without post-processing on the client-side
 - Keep your index size manageable by optionally re-using OCR documents on disk for
   highlighting
 
@@ -62,5 +62,5 @@ If you want to see the **plugin in action**, you can play around with the
 [example setup](example.md) hosted at [https://ocrhl.jbaiter.de](https://ocrhl.jbaiter.de)
 
 Should you want to **run the example on your own computer** and play around with the
-settings, the [Docker-based setup is available on GitHub](https://github.com/dbmdz/solr-ocrhighlighting/tree/master/example)
+settings, the [Docker-based setup is available on GitHub](https://github.com/dbmdz/solr-ocrhighlighting/tree/main/example)
 and instructions for using it are in the [Example Setup chapter](example.md)
diff --git a/docs/indexing.md b/docs/indexing.md
index 15178000..edfa4a5b 100644
--- a/docs/indexing.md
+++ b/docs/indexing.md
@@ -1,10 +1,10 @@
 # Indexing OCR documents
 
-**If you want to store the OCR in the index itself** you can skip this section: Just put the OCR
-content in the field and submit it to Solr for indexing. We recommend using the space-efficient
-[MiniOCR format](./formats.md#miniocr) if you decide to go this way.
+!!! note "If you want to store the OCR in the index itself you can all but _skip this section_"
+    Just put the OCR content in the field and submit it to Solr for indexing. We recommend using the space-efficient
+    [MiniOCR format](./formats.md#miniocr) if you decide to go this way.
 
-Indexing OCR documents without storing the actual content in the index is also relatively simple:
+Indexing OCR documents without storing the actual content in the index is relatively simple:
 When building the index document, instead of putting  the actual OCR content into the field, you use
 a **source pointer**. This pointer will tell the plugin from which location to load the OCR content
 during indexing and highlighting.
@@ -26,6 +26,8 @@ the (again, potentially very large) contents themselves in the index.
     account. To signal to the plugin that a given source path is encoded in ASCII, include the `{ascii}`
     string after the path, e.g. `/mnt/data/ocrdoc.xml{ascii}[31337:41337]`.
 
+    For even more advice on performance tuning, refer to the [corresponding documentation section](./performance.md).
+
 The structure of the source pointers depends on how your actual OCR files on disk map to documents in the Solr
 index.
 
@@ -111,7 +113,7 @@ The format of the regions is inspired by [Python's slicing syntax](https://docs.
 - `start:end` → Everything between the byte offsets `start` (inclusive) and `end` (exclusive)
 - `:end` → Everything from the start of the file to byte offset `end` (exclusive)
 
-!!! caution "Region Requirements""
+!!! caution "Region Requirements"
     - The concatenated content of your regions must be a half-way valid XML structure. While we
       tolerate *unclosed tags or unmatched closing tags* (they often can't be avoided), other
       errors such as partial tags (i.e. a missing `<` or `>`) will lead  to an error during indexing.
diff --git a/docs/installation.md b/docs/installation.md
index f80e7271..402e128e 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -1,55 +1,83 @@
 ## Requirements
 - Some familiarity with configuring Solr
-- Solr >= 7.5
+- Solr ≥ 7.5
 - OCR documents need to be in [hOCR](formats.md#hocr), [ALTO](formats.md#alto)
   or [MiniOCR](formats.md#miniocr) formats, with at least page-, and word-level
   segmentation
 
-## Obtaining the plugin JAR
+## Manually installing the plugin JAR
+To use the latest release version, refer to the [GitHub Releases list](https://github.com/dbmdz/solr-ocrhighlighting/releases). From there, download the corresponding JAR file.
+To make the plugin available to Solr, create a new directory `$SOLR_HOME/contrib/ocrhighlighting/lib` and place the JAR you just downloaded there.
 
-To use the latest release version, refer to the [GitHub Releases list](https://github.com/dbmdz/solr-ocrhighlighting/releases). From there, download the JAR file for the latest version.
-To make the plugin available to Solr, create a new directory `$SOLR_HOME/contrib/ocrsearch/lib` and place the JAR you just downloaded there.
+## For Solrcloud users: Installation as a Solr Package
+Since version 8.4, Solrcloud ships with a package management subsystem that can be used
+to conveniently install plugins from the command-line. To install the OCR highlighting
+plugin in this way, follow these steps on one of the nodes in
+your Solrcloud cluster. All paths are relative to the Solr installation directory:
 
+- **Add repository** to the local package registry:<br>
+  `$ ./bin/solr package add-repo dbmdz.github.io https://dbmdz.github.io/solr`
+- **Install package** in the latest version:<br>
+  `$ ./bin/solr package install ocrhighlighting` if you're on Solr 9, otherwise:
+  `$ ./bin/solr package install ocrhighlighting:0.8.0-solr78`
+
+!!! caution "Be sure to use the `ocrhighlighting:` prefix when specifying classes in your configuration."
+    When using the Package Manager, classes from plugins have to be prefixed (separated by a colon) by
+    their plugin's  identifier, for this plugin this identifier is `ocrhighlighting`. So whenever
+    you see an attribute like `class="solrocr.SomeClass"`, you have to write
+    `class="ocrhighlighting:solrocr.SomeClass"` in your config instead.
 
 # Core Configuration
 
 To enable the use of the plugin for your Solr core, you will have to edit
 both the `solrconfig.xml` and the `schema.xml` file in your core's `conf` directory.
 
+Additionally, if you have installed the plugin via Solr's Package Management, you will
+have to *deploy* the plugin to your collection/core using Solr's CLI:
+
+```bash
+$ bin/solr package deploy ocrhighlighting -collections <your-collection>
+```
+
 ## SolrConfig
 
-In your core's `solrconfig.xml, you need to:
+In your core's `solrconfig.xml`, you need to:
 
-1. Instruct the core to load the OCR highlighting plugin, so it can find the classes
-   needed to perform OCR indexing and highlighting.
+1. Enable the plugin for your collection/core by  instructing the collection from where to
+   load the plugin classes (**Skip when using Solrcloud with Package Manager** )
 2. Define a search component that will perform the OCR highlighting at query time
 3. Add the search component to your request handlers that will trigger the highlighting.
 
 
-```xml
+```xml hl_lines="10 16 17 18 33"
 <config>
   <!-- ...other configuration options... -->
 
-  <!--
+  <!-- Only needed when not using the Package Management:
     Tell Solr to load all JAR files from the directory installed the plugin to. 
-    This assumes a directory structure where the cores are in `$SOLR_HOME/server/solr/$CORE`
-    and the plugin JAR was installed in `$SOLR_HOME/contrib/ocrsearch/lib`. Adjust the
-    path if you setup differs.
+    This assumes a directory structure where the cores are in
+    `$SOLR_HOME/server/solr/$CORE` and the plugin JAR was installed in
+    `$SOLR_HOME/contrib/ocrhighlighting/lib`. Adjust the path if you setup differs.
   -->
-  <lib dir="../../../contrib/ocrsearch/lib" regex=".*\.jar" />
+  <lib dir="../../../contrib/ocrhighlighting/lib" regex=".*\.jar" />
 
-  <!-- Add a new named search component that takes care of highlighting OCR field values. -->
-  <searchComponent class="de.digitalcollections.solrocr.solr.OcrHighlightComponent"
-                   name="ocrHighlight" />
+  <!-- Add a new named search component that takes care of highlighting OCR field
+       values.
+       NOTE: Add the `ocrhighlighting:` prefix if using Package Management.
+  -->
+  <searchComponent
+      class="solrocr.OcrHighlightComponent"
+      name="ocrHighlight" />
 
    <!-- ...other search components... -->
 
   <!--
-    Instruct the request handlers you want to enable OCR highlighting for to include the
-    search component you defined above. This example uses the standard /select handler.
+    Instruct the request handlers you want to enable OCR highlighting for to
+    include the search component you defined above. This example uses the
+    standard /select handler.
 
-    CAUTION: Make sure that the OCR highlight component is listed **before** the standard
-    highlighting component, but **after** the query component.
+    CAUTION: Make sure that the OCR highlight component is listed **before** the
+    standard highlighting component, but **after** the query component.
   -->
   <requestHandler name="/select" class="solr.SearchHandler">
       <arr name="components">
@@ -64,6 +92,7 @@ In your core's `solrconfig.xml, you need to:
 If you run into problems, a look into these sections of the Solr user's guide might be helpful:
 
 - [Resource and Plugin Loading](https://lucene.apache.org/solr/guide/8_1/resource-and-plugin-loading.html)
+- [Package Manager](https://solr.apache.org/guide/8_11/package-manager.html)
 - [RequestHandlers and SearchComponents in SolrConfig](https://lucene.apache.org/solr/guide/8_1/requesthandlers-and-searchcomponents-in-solrconfig.html)
 
 
@@ -74,31 +103,42 @@ In the core's `schema.xml`, you need to:
 1. Define a new field type that will hold your indexed OCR text
 2. Define which fields are going to hold the indexed OCR text.
 
-The **field type** for OCR text is usually identical to your regular text field, with the
+The **field type** for OCR text is very similar to your regular text field, with the
 difference that there are one or two extra *character filters* at the beginning of your
 *index analysis chain*:
+
   - `ExternalUtf8ContentFilterFactory` will (optionally) allow you to index and highlight OCR from
     external  sources on the file system. More on this in the [Indexing chapter](./indexing.md).
+
   - `OcrCharFilterFactory` will retrieve the raw OCR data and extract the plain text that is
     going to pass through the rest of the analysis chain. It will auto-detect the used OCR
     formats, which means that **you can use different OCR formats alongside each other**.
     After this filter, Solr will treat the field just like a regular text field for purposes
     of analysis.
 
-```xml
+Additionally, you need to enable the `storeOffsetsWithPositions` option. The plugin uses these
+offsets to locate the matching terms in the OCR documents.
+
+```xml hl_lines="6 11 12 14 15 29"
 <schema>
   <types>
-    <fieldtype name="text_ocr" class="solr.TextField" storeOffsetsWithPositions="true" 
-               termVectors="true">
+    <fieldtype
+        name="text_ocr"
+        class="solr.TextField"
+        storeOffsetsWithPositions="true">
+      <!-- NOTE: When not using the Package Manager, add the `ocrhighlighting:`
+                 prefix to  the `class` attributes -->
       <analyzer type="index">
         <!-- For loading external files as field values during indexing -->
-        <charFilter class="de.digitalcollections.solrocr.lucene.filters.ExternalUtf8ContentFilterFactory" />
+        <charFilter
+          class="solrocr.ExternalUtf8ContentFilterFactory" />
         <!-- For converting OCR to plaintext -->
-        <charFilter class="de.digitalcollections.solrocr.lucene.filters.OcrCharFilterFactory" />
+        <charFilter
+          class="solrocr.OcrCharFilterFactory" />
         <!-- ...rest of your index analysis chain... -->
       </analyzer>
       <analyzer type="query">
-        <!-- your query analysis chain, should not include the character filters -->
+        <!-- query analysis chain, should not include the character filters -->
       </analyzer>
     </fieldtype>
   </types>
@@ -107,7 +147,8 @@ difference that there are one or two extra *character filters* at the beginning
     <!-- ...your other fields ... -->
 
     <!-- A field that uses the OCR field type. Has to be `stored`. -->
-    <field name="ocr_text" type="text_ocr" multiValued="false" indexed="true" stored="true" />
+    <field name="ocr_text" type="text_ocr" multiValued="false" indexed="true"
+           stored="true" />
   </fields>
 </schema>
 ```
diff --git a/docs/performance.md b/docs/performance.md
index 0c1aa386..dc58503d 100644
--- a/docs/performance.md
+++ b/docs/performance.md
@@ -30,8 +30,8 @@ Important factors include:
 - *Number of possible parallel reads* (see below): Does the storage layer support more than one active reader?
 
 Generally speaking, local storage is better than remote storage (like NFS or CIFS), due to the network latency, and
-flash-based storage is better than disk-based storage, due to the lower random read latency. A RAID setup is
-preferred over a JBOD setup, due to the potential for parallel reads.
+flash-based storage is better than disk-based storage, due to the lower random read latency and the possibility to
+do parallel reads. A RAID1/10 setup is preferred over a RAID0/JBOD setup, due to the increased potential for parallel reads.
 
 ## Plugin configuration
 The plugin offers the possibility to perform a **concurrent read-ahead of highlighting target files**. This will perform
@@ -56,8 +56,11 @@ of reads from either the `qtp...` or `solr-ocrhlighight` threads on  the second
 
 Example configuration tuned for remote NFS storage mounted with `rsize=65536`:
 ```xml
-<searchComponent class="de.digitalcollections.solrocr.solr.OcrHighlightComponent"
-                 name="ocrHighlight" enablePreload="true" preloadReadSize="65536" preloadConcurrency="8"/>
+<searchComponent
+  class="solrocr.OcrHighlightComponent"
+  name="ocrHighlight" enablePreload="true" preloadReadSize="65536"
+  preloadConcurrency="8"
+/>
 ```
 
 
diff --git a/mkdocs.yml b/mkdocs.yml
index ba90b805..00707f49 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,19 +1,29 @@
 site_name: Solr OCR Highlighting Plugin
-theme: readthedocs
+#theme: readthedocs
+theme: material
 nav:
-    - Introduction: index.md
-    - Installation: installation.md
-    - Indexing: indexing.md
-    - Indexing Alternative Terms: alternatives.md
-    - Querying: query.md
-    - Example Setup: example.md
-    - Performance Tuning: performance.md
-    - Supported Formats: formats.md
-    - Change Log: changes.md
+  - Introduction: index.md
+  - Installation: installation.md
+  - Indexing: indexing.md
+  - Indexing Alternative Terms: alternatives.md
+  - Querying: query.md
+  - Example Setup: example.md
+  - Performance Tuning: performance.md
+  - Supported Formats: formats.md
+  - Change Log: changes.md
 markdown_extensions:
-    - def_list
-    - attr_list
-    - fenced_code
-    - admonition
-    - smarty
-repo_url: https://github.com/dbmdz/solr-ocrhighlighting/
\ No newline at end of file
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+  - admonition
+  - pymdownx.details
+  - pymdownx.superfences
+  - def_list
+repo_url: https://github.com/dbmdz/solr-ocrhighlighting/
+plugins:
+  - mike
+extra:
+  version:
+    provider: mike
\ No newline at end of file