diff --git a/docs/changes.md b/docs/changes.md index c2de0061..db1832b9 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -14,6 +14,10 @@ was that in the presence of I/O errors like disappearing mounts, truncated files, etc, the JVM could simply crash (due to the kernel sending a `SIGBUS` signal when encountering an I/O error). +**Fixed:** +- When using source pointers with multiple files, the plugin no longer leaks file descriptors. We + previously didn't close the currently opened file when opening the next one. + ## 0.8.5 (2024-04-25) [GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.8.5) diff --git a/src/main/java/com/github/dbmdz/solrocr/reader/MultiFileReader.java b/src/main/java/com/github/dbmdz/solrocr/reader/MultiFileReader.java index a41a2677..1193b0f0 100644 --- a/src/main/java/com/github/dbmdz/solrocr/reader/MultiFileReader.java +++ b/src/main/java/com/github/dbmdz/solrocr/reader/MultiFileReader.java @@ -6,7 +6,9 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.LinkedList; import java.util.List; import java.util.Locale; @@ -42,13 +44,15 @@ public int read(char[] cbuf, int off, int len) throws IOException { while (numRead < len && currentReader != null) { int read = this.currentReader.read(cbuf, off, len); if (read < len) { + this.currentReader.close(); if (this.remainingSources.isEmpty()) { // No more readers, return what was read so far this.currentReader = null; } else { this.currentReader = new InputStreamReader( - new FileInputStream(remainingSources.remove().toFile()), StandardCharsets.UTF_8); + Files.newInputStream(remainingSources.remove(), StandardOpenOption.READ), + StandardCharsets.UTF_8); } } if (read < 0) { diff --git a/src/main/java/solrocr/ExternalUtf8ContentFilterFactory.java b/src/main/java/solrocr/ExternalUtf8ContentFilterFactory.java index 3a414a1e..019887b7 100644 --- a/src/main/java/solrocr/ExternalUtf8ContentFilterFactory.java +++ b/src/main/java/solrocr/ExternalUtf8ContentFilterFactory.java @@ -7,7 +7,6 @@ import com.google.common.collect.ImmutableList; import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; @@ -15,6 +14,7 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.StandardOpenOption; import java.util.List; import java.util.Locale; @@ -81,7 +81,8 @@ public Reader create(Reader input) { } else { r = new InputStreamReader( - new FileInputStream(pointer.sources.get(0).path.toFile()), StandardCharsets.UTF_8); + Files.newInputStream(pointer.sources.get(0).path, StandardOpenOption.READ), + StandardCharsets.UTF_8); } List charRegions = diff --git a/src/test/java/com/github/dbmdz/solrocr/reader/MultiFileSurceReaderTest.java b/src/test/java/com/github/dbmdz/solrocr/reader/MultiFileSourceReaderTest.java similarity index 98% rename from src/test/java/com/github/dbmdz/solrocr/reader/MultiFileSurceReaderTest.java rename to src/test/java/com/github/dbmdz/solrocr/reader/MultiFileSourceReaderTest.java index 05ca3116..84872bab 100644 --- a/src/test/java/com/github/dbmdz/solrocr/reader/MultiFileSurceReaderTest.java +++ b/src/test/java/com/github/dbmdz/solrocr/reader/MultiFileSourceReaderTest.java @@ -17,12 +17,12 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -class MultiFileSurceReaderTest { +class MultiFileSourceReaderTest { private final List filePaths; private final SourcePointer pointer; private final int maxCacheEntries = 10; - MultiFileSurceReaderTest() throws IOException { + MultiFileSourceReaderTest() throws IOException { Path root = Paths.get("src/test/resources/data/alto_multi"); filePaths = new ArrayList<>(); try (DirectoryStream stream = Files.newDirectoryStream(root, "1860-11-30*.xml")) {