Skip to content

Commit

Permalink
1. add strict mode
Browse files Browse the repository at this point in the history
2. bump version to 0.1.3
  • Loading branch information
hwding committed Sep 9, 2017
1 parent 83865bf commit 3c30f64
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 34 deletions.
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@ Remove text stamps of **any font**, **any encoding** and **any language** with p

## Usage
```
Usage:
Usage:
[OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])
[OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])
Options:
-d, --directly directly modify the input file(s), which makes option o/O unnecessary
-d, --directly directly modify the input file(s), option o/O is
unnecessary when this option is on
-r, --recursive process files in the given dir recursively
-s, --strict use strict mode, a text area is considered as water mark
only if its content strictly equals one of the keywords
```

## Get it now
Expand All @@ -48,10 +51,10 @@ Make sure you have `wget` installed.
#### Run
```shell
# For single file processing
➜ unstamp -i "C Recipes.pdf" -o "C Recipes.unstamped.pdf" -k www.allitebooks.com
➜ unstamp -i "C Recipes.pdf" -o "C Recipes.unstamped.pdf" -k www.allitebooks.com -s
➜ unstamp -i RoR.pdf -o RoR.unstamped.pdf -k 图灵社区会员
# Or
➜ unstamp -i "C Recipes.pdf" -d -k www.allitebooks.com
➜ unstamp -i "C Recipes.pdf" -d -k www.allitebooks.com -s
➜ unstamp -i RoR.pdf -d -k 图灵社区会员

# For massive files processing
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.amastigote</groupId>
<artifactId>unstamper</artifactId>
<version>0.1.2</version>
<version>0.1.3</version>
<description>Text stamp remover for PDF files.</description>
<name>pdf-unstamper</name>
<url>https://github.com/hwding/pdf-unstamper</url>
Expand Down
2 changes: 1 addition & 1 deletion script/install
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ user_bin=`echo ~`"/bin/"
locl_bin="/usr/local/bin/"
jar_name="pdf-unstamper.jar"
exe_name="unstamp"
_version="0.1.2"
_version="0.1.3"
jar_durl="https://github.com/hwding/pdf-unstamper/releases/download/$_version/$jar_name"
wrapper="#!/bin/bash\njava -jar ${user_bin}${jar_name} \"\$@\"\n"

Expand Down
5 changes: 3 additions & 2 deletions src/com/amastigote/unstamper/Main.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
AUTH | hwding
DATE | Sep 05 2017
DATE | Sep 10 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
Expand Down Expand Up @@ -37,7 +37,8 @@ public static void main(@NotNull String[] args) {
System.exit(0);
} else {
TaskRunner.init(
commandLine.getOptionValues('k'));
commandLine.getOptionValues('k'),
commandLine.hasOption('s'));

if (commandLine.hasOption('i') && (commandLine.hasOption('o') || commandLine.hasOption('d'))) {
if (commandLine.hasOption('d'))
Expand Down
7 changes: 4 additions & 3 deletions src/com/amastigote/unstamper/core/Processor.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
AUTH | hwding
DATE | Sep 05 2017
DATE | Sep 10 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
Expand All @@ -27,7 +27,8 @@
public class Processor {
public static void process(
@NotNull File file,
@NotNull String[] strings) {
@NotNull String[] strings,
@NotNull boolean useStrict) {
AtomicBoolean processAllOk = new AtomicBoolean(true);
GeneralLogger.Processor.procInProgress(file.getName());

Expand Down Expand Up @@ -61,7 +62,7 @@ public static void process(
if (e instanceof COSString) {
/* Ignore Any Exception During Parallel Processing */
try {
if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts))
if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts, useStrict))
((COSString) e).setValue(new byte[0]);
} catch (Exception ignored) {
}
Expand Down
41 changes: 27 additions & 14 deletions src/com/amastigote/unstamper/core/TextStampRecognizer.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
AUTH | hwding
DATE | Sep 05 2017
DATE | Sep 10 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
Expand All @@ -18,15 +18,16 @@ class TextStampRecognizer {
private static boolean recognizeWithFont(
@NotNull String[] keywords,
@NotNull byte[] inputText,
@NotNull Set<PDFont> pdFonts) {
String bs = generateByteString(inputText);
@NotNull Set<PDFont> pdFonts,
@NotNull boolean useStrict) {
String encodedInput = generateByteString(inputText);
for (PDFont f : pdFonts) {
if (f == null) continue;
for (String k : keywords) {
try {
byte[] encodedKeywords = f.encode(k);
if (bs.contains(generateByteString(encodedKeywords)))
return true;
byte[] encodedKeywordBytes = f.encode(k);
final String encodedKeyword = generateByteString(encodedKeywordBytes);
if (checkDuplicate(encodedInput, encodedKeyword, useStrict)) return true;
} catch (IOException | IllegalArgumentException ignored) {
}
}
Expand All @@ -36,26 +37,38 @@ private static boolean recognizeWithFont(

private static boolean recognizePlain(
@NotNull String[] keywords,
@NotNull byte[] inputText
@NotNull byte[] inputText,
@NotNull boolean useStrict
) {
for (String k : keywords) {
if (new String(inputText).contains(k)) return true;
for (String k : keywords)
if (checkDuplicate(new String(inputText), k, useStrict)) return true;
return false;
}

private static boolean checkDuplicate(
@NotNull String input,
@NotNull String keyword,
@NotNull boolean useStrict) {
if (useStrict) {
if (input.equals(keyword)) return true;
} else {
if (input.contains(keyword)) return true;
}
return false;
}

static boolean recognize(@NotNull String[] keywords,
@NotNull byte[] inputText,
@NotNull Set<PDFont> pdFonts) {
return recognizePlain(keywords, inputText) ||
recognizeWithFont(keywords, inputText, pdFonts);
@NotNull Set<PDFont> pdFonts,
@NotNull boolean useStrict) {
return recognizePlain(keywords, inputText, useStrict) ||
recognizeWithFont(keywords, inputText, pdFonts, useStrict);
}

private static String generateByteString(@NotNull byte[] bytes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {
for (byte b : bytes)
stringBuilder.append(Byte.toString(b));
}
return stringBuilder.toString();
}
}
13 changes: 8 additions & 5 deletions src/com/amastigote/unstamper/log/GeneralLogger.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
AUTH | hwding
DATE | Sep 05 2017
DATE | Sep 10 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
Expand All @@ -12,13 +12,16 @@
public class GeneralLogger {
public static class Help {
private static final String usage =
"\nPDF-UnStamper ver. 0.1.2 by hwding@GitHub\n" +
"\nPDF-UnStamper ver. 0.1.3 by hwding@GitHub\n" +
"\nUsage:" +
"\n [OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])" +
"\n [OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])\n" +
"\nOptions:" +
"\n -d, --directly directly modify the input file(s), which makes option o/O unnecessary" +
"\n -r, --recursive process files in the given dir recursively\n";
"\n -d, --directly directly modify the input file(s), option o/O is\n" +
" unnecessary when this option is on" +
"\n -r, --recursive process files in the given dir recursively" +
"\n -s, --strict use strict mode, a text area is considered as water mark\n" +
" only if its content strictly equals one of the keywords\n";

public static void print() {
System.out.println(usage);
Expand Down Expand Up @@ -61,7 +64,7 @@ public static void procInProgress(@NotNull String fn) {
}

public static void procFinished() {
System.out.println(" GOOD");
System.out.println(" done");
}
}
}
4 changes: 3 additions & 1 deletion src/com/amastigote/unstamper/util/OptionManager.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
AUTH | hwding
DATE | Aug 25 2017
DATE | Sep 10 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
Expand All @@ -18,6 +18,7 @@ public class OptionManager {
private final static Option optionK = new Option("k", true, null);
private final static Option optionD = new Option("d", "directly", false, null);
private final static Option optionR = new Option("r", "recursive", false, null);
private final static Option optionS = new Option("s", "strict", false, null);

public static Options buildOptions() {
Options options = new Options();
Expand All @@ -34,6 +35,7 @@ public static Options buildOptions() {
options.addOption(optionD);
options.addOption(optionR);
options.addOption(optionK);
options.addOption(optionS);
return options;
}
}
10 changes: 7 additions & 3 deletions src/com/amastigote/unstamper/util/TaskRunner.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
AUTH | hwding
DATE | Sep 05 2017
DATE | Sep 10 2017
DESC | text stamp remover for PDF files
MAIL | [email protected]
GITH | github.com/hwding
Expand All @@ -19,9 +19,13 @@

public class TaskRunner {
private static String[] keywords;
private static boolean useStrict;

public static void init(@NotNull String[] keywords) {
public static void init(
@NotNull String[] keywords,
@NotNull boolean useStrict) {
TaskRunner.keywords = keywords;
TaskRunner.useStrict = useStrict;
}

public static void procSingleFile(
Expand All @@ -42,7 +46,7 @@ public static void procSingleFile(
}

private static void submitToProcessor(@NotNull File file) {
Processor.process(file, keywords);
Processor.process(file, keywords, useStrict);
}

public static void procSingleFileDirectly(@NotNull String ifn) {
Expand Down

0 comments on commit 3c30f64

Please sign in to comment.