From 002b5b043f2e11dfa5c4c4b09e1d13f790be00c3 Mon Sep 17 00:00:00 2001 From: Katharina Schmid Date: Mon, 17 Jun 2024 16:11:00 +0200 Subject: [PATCH] Make sure all bytes have been read --- .../github/dbmdz/solrocr/reader/BaseSourceReader.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java b/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java index 23419ded..58d0de83 100644 --- a/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java +++ b/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java @@ -142,7 +142,10 @@ public String readUtf8String(int start, int byteLen) throws IOException { byteLen = this.length() - start; } byte[] data = new byte[byteLen]; - this.readBytes(data, 0, start, byteLen); + int numRead = 0; + while (numRead < byteLen) { + numRead += this.readBytes(data, numRead, start + numRead, byteLen - numRead); + } int dataStart = adjustOffset(0, data, AdjustDirection.RIGHT); int dataEnd = adjustOffset(data.length - 1, data, AdjustDirection.LEFT); return new String(data, dataStart, dataEnd - dataStart + 1, StandardCharsets.UTF_8); @@ -213,7 +216,10 @@ public Section getAsciiSection(int offset) throws IOException { } int startOffset = sectionIndex * sectionSize; int readLen = Math.min(sectionSize, this.length() - startOffset); - this.readBytes(copyBuf, 0, startOffset, readLen); + int numRead = 0; + while(numRead < readLen) { + numRead += this.readBytes(copyBuf, 0, startOffset, readLen); + } // Construct a String without going through a decoder to save on CPU. // Given that the method has been deprecated since Java 1.1 and was never removed, I don't think // this is very risky 😅