Skip to content

Commit

Permalink
- provide a more elegant and efficient way to load fonts in each page
Browse files Browse the repository at this point in the history
- bump version & head comment date
  • Loading branch information
hwding committed Sep 4, 2017
1 parent 1615901 commit 08ac529
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 44 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.amastigote</groupId>
<artifactId>unstamper</artifactId>
<version>0.1.1</version>
<description>text stamp remover for PDF files</description>
<version>0.1.2</version>
<description>Text stamp remover for PDF files.</description>
<name>pdf-unstamper</name>
<url>https://github.com/hwding/pdf-unstamper</url>
<build>
Expand Down
4 changes: 2 additions & 2 deletions script/install
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/bin/bash

#AUTH hwding
#DATE AUG/25/2017
#DATE SEP/04/2017
#DESC install unstamp as a command

user_bin=`echo ~`"/bin/"
jar_name="pdf-unstamper.jar"
exe_name="unstamp"
_version="0.1.1"
_version="0.1.2"
jar_durl="https://github.com/hwding/pdf-unstamper/releases/download/$_version/$jar_name"

function chk_f() {
Expand Down
62 changes: 24 additions & 38 deletions src/com/amastigote/unstamper/core/Processor.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
/*
AUTH | hwding
DATE | Aug 27 2017
DATE | Sep 04 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
*/
package com.amastigote.unstamper.core;

import com.amastigote.unstamper.log.GeneralLogger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
Expand All @@ -19,11 +18,9 @@
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class Processor {
public static void process(File file, String[] strings) {
Expand All @@ -35,45 +32,34 @@ public static void process(File file, String[] strings) {
PDDocument pdDocument = PDDocument.load(file);
pdDocument.getPages().forEach(pdPage -> {
try {
/* START: loading font resources for further parsing */
/* START: loading font resources from current page */
PDFStreamParser pdfStreamParser = new PDFStreamParser(pdPage);
pdfStreamParser.parse();

List<Object> objects =
Collections.synchronizedList(pdfStreamParser.getTokens());
List<Object> objects = pdfStreamParser.getTokens();
Set<PDFont> pdFonts = new HashSet<>();

List<Object> cosNames =
objects.parallelStream()
.filter(e -> e instanceof COSName)
.collect(Collectors.toList());

Set<PDFont> pdFonts =
Collections.synchronizedSet(new HashSet<>());

cosNames.parallelStream()
.forEach(e -> {
/* Ignore Any Exception During Parallel Processing */
try {
PDFont pdFont = pdPage.getResources().getFont(((COSName) e));
if (pdFont != null)
pdFonts.add(pdFont);
} catch (Exception ignored) {
}
});
pdPage.getResources().getFontNames().forEach(e -> {
/* Ignore Any Exception During Parallel Processing */
try {
PDFont pdFont = pdPage.getResources().getFont(e);
if (pdFont != null)
pdFonts.add(pdFont);
} catch (Exception ignored) {
}
});
/* END */
objects
.parallelStream()
.forEach(e -> {
if (e instanceof COSString) {
/* Ignore Any Exception During Parallel Processing */
try {
if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts))
((COSString) e).setValue(new byte[0]);
} catch (Exception ignored) {
}
}
}
);

objects.parallelStream().forEach(e -> {
if (e instanceof COSString) {
/* Ignore Any Exception During Parallel Processing */
try {
if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts))
((COSString) e).setValue(new byte[0]);
} catch (Exception ignored) {
}
}
});

PDStream newContents = new PDStream(pdDocument);
OutputStream out = newContents.createOutputStream();
Expand Down
4 changes: 2 additions & 2 deletions src/com/amastigote/unstamper/log/GeneralLogger.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
AUTH | hwding
DATE | Aug 27 2017
DATE | Sep 04 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
Expand All @@ -10,7 +10,7 @@
public class GeneralLogger {
public static class Help {
private static final String usage =
"\nPDF-UnStamper ver. 0.1.1 by hwding@GitHub\n" +
"\nPDF-UnStamper ver. 0.1.2 by hwding@GitHub\n" +
"\nUsage: " +
"\n [OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])" +
"\n [OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])\n" +
Expand Down

0 comments on commit 08ac529

Please sign in to comment.