Skip to content

Commit

Permalink
Ufal/Preview issues (#764)
Browse files Browse the repository at this point in the history
* Ensure the content preview doesn't overload maximum length of the column in the database. And encode the input stream into UTF-8.

* Do not store HTML content in the database because it could be longer than the limit of the database column.
  • Loading branch information
milanmajchrak authored Sep 20, 2024
1 parent 5e1c099 commit 1e2b8ef
Showing 1 changed file with 50 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
Expand Down Expand Up @@ -82,6 +83,10 @@
public class MetadataBitstreamRestRepository extends DSpaceRestRepository<MetadataBitstreamWrapperRest, Integer> {
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(MetadataBitstreamRestRepository.class);

// This constant is used to limit the length of the preview content stored in the database to prevent
// the database from being overloaded with large amounts of data.
private static final int MAX_PREVIEW_COUNT_LENGTH = 2000;

@Autowired
HandleService handleService;

Expand Down Expand Up @@ -170,8 +175,12 @@ public Page<MetadataBitstreamWrapperRest> findByHandle(@Parameter(value = "handl
// Generate new content if we didn't find any
if (prContents.isEmpty()) {
fileInfos = getFilePreviewContent(context, bitstream, fileInfos);
for (FileInfo fi : fileInfos) {
createPreviewContent(context, bitstream, fi);
// Do not store HTML content in the database because it could be longer than the limit
// of the database column
if (!StringUtils.equals("text/html", bitstream.getFormat(context).getMIMEType())) {
for (FileInfo fi : fileInfos) {
createPreviewContent(context, bitstream, fi);
}
}
} else {
for (PreviewContent pc : prContents) {
Expand Down Expand Up @@ -311,8 +320,11 @@ private List<FileInfo> processInputStreamToFilePreview(Context context, Bitstrea
List<FileInfo> fileInfos, InputStream inputStream)
throws IOException, SQLException, ParserConfigurationException, SAXException, ArchiveException {
String bitstreamMimeType = bitstream.getFormat(context).getMIMEType();
if (bitstreamMimeType.equals("text/plain") || bitstreamMimeType.equals("text/html")) {
String data = getFileContent(inputStream);
if (bitstreamMimeType.equals("text/plain")) {
String data = getFileContent(inputStream, true);
fileInfos.add(new FileInfo(data, false));
} else if (bitstreamMimeType.equals("text/html")) {
String data = getFileContent(inputStream, false);
fileInfos.add(new FileInfo(data, false));
} else {
String data = "";
Expand Down Expand Up @@ -462,17 +474,44 @@ public String extractFile(InputStream inputStream, String fileType) {
* @return content of the inputStream as a String
* @throws IOException
*/
public String getFileContent(InputStream inputStream) throws IOException {
public String getFileContent(InputStream inputStream, boolean cutResult) throws IOException {
StringBuilder content = new StringBuilder();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));

String line;
while ((line = reader.readLine()) != null) {
content.append(line).append("\n");
// Generate the preview content in the UTF-8 encoding
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
try {
String line;
while ((line = reader.readLine()) != null) {
content.append(line).append("\n");
}
} catch (UnsupportedEncodingException e) {
log.error("UnsupportedEncodingException during creating the preview content because: ", e);
} catch (IOException e) {
log.error("IOException during creating the preview content because: ", e);
}

reader.close();
return content.toString();
return cutResult ? ensureMaxLength(content.toString()) : content.toString();
}

/**
* Trims the input string to ensure it does not exceed the maximum length for the database column.
* @param input The original string to be trimmed.
* @return A string that is truncated to the maximum length if necessary.
*/
private static String ensureMaxLength(String input) {
if (input == null) {
return null;
}

// Check if the input string exceeds the maximum preview length
if (input.length() > MAX_PREVIEW_COUNT_LENGTH) {
// Truncate the string and append " . . ."
int previewLength = MAX_PREVIEW_COUNT_LENGTH - 6; // Subtract length of " . . ."
return input.substring(0, previewLength) + " . . .";
} else {
// Return the input string as is if it's within the preview length
return input;
}
}

/**
Expand Down

0 comments on commit 1e2b8ef

Please sign in to comment.