Skip to content

Commit

Permalink
MultiFileReader: Close old file when opening a new one
Browse files Browse the repository at this point in the history
  • Loading branch information
jbaiter committed May 29, 2024
1 parent def4f26 commit 5bc9098
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 5 deletions.
4 changes: 4 additions & 0 deletions docs/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
was that in the presence of I/O errors like disappearing mounts, truncated files, etc, the JVM could
simply crash (due to the kernel sending a `SIGBUS` signal when encountering an I/O error).

**Fixed:**
- When using source pointers with multiple files, the plugin no longer leaks file descriptors. We
previously didn't close the currently opened file when opening the next one.


## 0.8.5 (2024-04-25)
[GitHub Release](https://github.com/dbmdz/solr-ocrhighlighting/releases/tag/0.8.5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
Expand Down Expand Up @@ -42,13 +44,15 @@ public int read(char[] cbuf, int off, int len) throws IOException {
while (numRead < len && currentReader != null) {
int read = this.currentReader.read(cbuf, off, len);
if (read < len) {
this.currentReader.close();
if (this.remainingSources.isEmpty()) {
// No more readers, return what was read so far
this.currentReader = null;
} else {
this.currentReader =
new InputStreamReader(
new FileInputStream(remainingSources.remove().toFile()), StandardCharsets.UTF_8);
Files.newInputStream(remainingSources.remove(), StandardOpenOption.READ),
StandardCharsets.UTF_8);
}
}
if (read < 0) {
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/solrocr/ExternalUtf8ContentFilterFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import com.google.common.collect.ImmutableList;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.Locale;
Expand Down Expand Up @@ -81,7 +81,8 @@ public Reader create(Reader input) {
} else {
r =
new InputStreamReader(
new FileInputStream(pointer.sources.get(0).path.toFile()), StandardCharsets.UTF_8);
Files.newInputStream(pointer.sources.get(0).path, StandardOpenOption.READ),
StandardCharsets.UTF_8);
}

List<SourcePointer.Region> charRegions =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

class MultiFileSurceReaderTest {
class MultiFileSourceReaderTest {
private final List<Path> filePaths;
private final SourcePointer pointer;
private final int maxCacheEntries = 10;

MultiFileSurceReaderTest() throws IOException {
MultiFileSourceReaderTest() throws IOException {
Path root = Paths.get("src/test/resources/data/alto_multi");
filePaths = new ArrayList<>();
try (DirectoryStream<Path> stream = Files.newDirectoryStream(root, "1860-11-30*.xml")) {
Expand Down

0 comments on commit 5bc9098

Please sign in to comment.