Skip to content

Commit

Permalink
[RELEASE] iText pdfOCR 4.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
iText-CI committed Oct 11, 2024
2 parents 78c4b90 + 55a45d5 commit eb25fb0
Show file tree
Hide file tree
Showing 70 changed files with 1,008 additions and 1,013 deletions.
40 changes: 40 additions & 0 deletions SECURITY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# iText Security Policy

## Reporting a Vulnerability

We are committed to maintaining the security of our software. If you discover a security vulnerability, we encourage you to report it to us as soon as possible.

To report a vulnerability, please visit our [Vulnerability Reporting Page](https://itextpdf.com/report-vulnerability), or email [[email protected]]([email protected]). If you do not receive a response in 2 business days, please follow up as we may not have received your message.

We follow the procedure of Coordinated Vulnerability Disclosure (CVD) and, to protect the ecosystem, we request that those reporting do the same. Please visit the above page for more information, and follow the steps below to ensure that your report is handled promptly and appropriately:

1. **Do not disclose the vulnerability publicly** until we have had a chance to address it.
2. **Provide a detailed description** of the vulnerability, including steps to reproduce it, if possible.
3. **Include any relevant information** such as the version of pdfOCR you are using, your operating system, and any other pertinent details.

## Security Updates and Patches

When a vulnerability is reported, we will:

1. **Investigate and verify** the vulnerability.
2. **Develop and test** a fix for the vulnerability.
3. **Release a patch** as soon as possible.


## Known Vulnerabilities

The iText Knowledge Base has a page for known [Common Vulnerabilities and Exposures](https://kb.itextpdf.com/itext/cves) (CVEs), please check it to ensure your vulnerability has not already been disclosed or addressed.

## Supported product lines

See [Compatibility Matrix](https://kb.itextpdf.com/itext/compatibility-matrix)

## Security Best Practices

To help ensure the security of your applications using pdfOCR, we recommend the following best practices:

1. **Keep pdfOCR up to date** by regularly checking for and applying updates.
2. **Review and follow** our security guidelines for secure usage.
3. **Monitor your applications** for any unusual activity and investigate any anomalies promptly.

Thank you for helping us keep iText secure!
2 changes: 1 addition & 1 deletion pdfocr-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>com.itextpdf</groupId>
<artifactId>pdfocr-root</artifactId>
<version>3.0.2</version>
<version>4.0.0</version>
</parent>

<artifactId>pdfocr-api</artifactId>
Expand Down
7 changes: 7 additions & 0 deletions pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,11 @@ public interface IOcrEngine {
* @param ocrProcessContext ocr processing context
*/
void createTxtFile(List<File> inputImages, File txtFile, OcrProcessContext ocrProcessContext);

/**
* Checks whether tagging is supported by the OCR engine.
*
* @return {@code true} if tagging is supported by the engine, {@code false} otherwise
*/
boolean isTaggingSupported();
}
43 changes: 17 additions & 26 deletions pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ This file is part of the iText (R) project.
import com.itextpdf.kernel.geom.Point;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.DocumentProperties;
import com.itextpdf.kernel.pdf.PdfAConformanceLevel;
import com.itextpdf.kernel.pdf.PdfAConformance;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
import com.itextpdf.kernel.pdf.PdfName;
Expand Down Expand Up @@ -67,8 +67,8 @@ This file is part of the iText (R) project.
import com.itextpdf.pdfocr.logs.PdfOcrLogMessageConstant;
import com.itextpdf.pdfocr.statistics.PdfOcrOutputType;
import com.itextpdf.pdfocr.statistics.PdfOcrOutputTypeStatisticsEvent;
import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;
import com.itextpdf.pdfocr.structuretree.ArtifactItem;
import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -129,6 +129,9 @@ public OcrPdfCreator(final IOcrEngine ocrEngine) {
*/
public OcrPdfCreator(final IOcrEngine ocrEngine,
final OcrPdfCreatorProperties ocrPdfCreatorProperties) {
if (ocrPdfCreatorProperties.isTagged() && !ocrEngine.isTaggingSupported()) {
throw new PdfOcrException(PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED);
}
setOcrEngine(ocrEngine);
setOcrPdfCreatorProperties(ocrPdfCreatorProperties);
}
Expand Down Expand Up @@ -471,11 +474,7 @@ private void addToCanvas(final PdfDocument pdfDocument,
// Logical tree, a list of top items, children can be retrieved out of them
List<LogicalStructureTreeItem> logicalTree = new ArrayList<>();
// A map of leaf LogicalStructureTreeItem's to TextInfo's attached to these leaves
Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos = new HashMap<>();
final boolean taggedSupported = getLogicalTree(pageText, logicalTree, leavesTextInfos);
if (!taggedSupported) {
throw new PdfOcrException(PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED);
}
Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos = getLogicalTree(pageText, logicalTree);
pdfDocument.setTagged();

// Create a map of TextInfo to tag pointers meanwhile creating the required tags.
Expand Down Expand Up @@ -504,7 +503,7 @@ private PdfDocument createPdfDocument(final PdfWriter pdfWriter,
boolean createPdfA3u = pdfOutputIntent != null;
if (createPdfA3u) {
pdfDocument = new PdfADocument(pdfWriter,
PdfAConformanceLevel.PDF_A_3U, pdfOutputIntent,
PdfAConformance.PDF_A_3U, pdfOutputIntent,
documentProperties);
} else {
pdfDocument = new PdfDocument(pdfWriter,
Expand Down Expand Up @@ -623,7 +622,7 @@ private void addImageToCanvas(final ImageData imageData,
ocrPdfCreatorProperties.getPageSize(), imageSize);
final Rectangle rect =
new Rectangle(
(float)coordinates.x, (float)coordinates.y,
(float)coordinates.getX(), (float)coordinates.getY(),
imageSize.getWidth(), imageSize.getHeight());
pdfCanvas.addImageFittedIntoRectangle(imageData, rect, false);
}
Expand All @@ -634,19 +633,12 @@ private void addImageToCanvas(final ImageData imageData,
}
}

/**
* @return {@code true} if tagging supported by the engine.
* @deprecated In next major version we need to add boolean taggingSupported() method into IOcrEngine
* and throw exception in OcrPdfCreator constructor if taggingSupported() returns false but
* OcrPdfCreatorProperties.getTagged returns true.
*/
@Deprecated
private static boolean getLogicalTree(List<TextInfo> textInfos,
List<LogicalStructureTreeItem> logicalStructureTreeItems,
Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos) {
boolean taggedSupported = false;
private static Map<LogicalStructureTreeItem, List<TextInfo>> getLogicalTree(
List<TextInfo> textInfos, List<LogicalStructureTreeItem> logicalStructureTreeItems) {

Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos = new HashMap<>();
if (textInfos == null) {
return taggedSupported;
return leavesTextInfos;
}

for (TextInfo textInfo : textInfos) {
Expand All @@ -656,7 +648,6 @@ private static boolean getLogicalTree(List<TextInfo> textInfos,
continue;
} else if (structTreeItem != null) {
topParent = getTopParent(structTreeItem);
taggedSupported = true;
} else {
structTreeItem = new LogicalStructureTreeItem();
textInfo.setLogicalStructureTreeItem(structTreeItem);
Expand All @@ -675,7 +666,7 @@ private static boolean getLogicalTree(List<TextInfo> textInfos,
}
}

return taggedSupported;
return leavesTextInfos;
}

private static LogicalStructureTreeItem getTopParent(LogicalStructureTreeItem structInfo) {
Expand Down Expand Up @@ -792,8 +783,8 @@ private void addTextToCanvas(
}

canvas.showTextAligned(paragraph,
xOffset + (float) imageCoordinates.x,
yOffset + (float) imageCoordinates.y,
xOffset + (float) imageCoordinates.getX(),
yOffset + (float) imageCoordinates.getY(),
TextAlignment.LEFT);

if (ocrPdfCreatorProperties.isTagged()) {
Expand Down Expand Up @@ -933,7 +924,7 @@ public PdfCanvas showText(GlyphLine text) {
// unicode of the not found glyph
String message = PdfOcrLogMessageConstant
.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER;
for (int i = glyphLine.start; i < glyphLine.end; i++) {
for (int i = glyphLine.getStart(); i < glyphLine.getEnd(); i++) {
if (isNotDefGlyph(currentFont, glyphLine.get(i))) {
notDefGlyphsExists = true;
message = MessageFormatUtil.format(PdfOcrLogMessageConstant
Expand Down
Loading

0 comments on commit eb25fb0

Please sign in to comment.