From 3c30f64838e4f61e580ee830f9ded1ed12640c9c Mon Sep 17 00:00:00 2001 From: hwding Date: Sun, 10 Sep 2017 01:45:23 +0800 Subject: [PATCH] 1. add strict mode 2. bump version to 0.1.3 --- README.md | 11 +++-- pom.xml | 2 +- script/install | 2 +- src/com/amastigote/unstamper/Main.java | 5 ++- .../amastigote/unstamper/core/Processor.java | 7 ++-- .../unstamper/core/TextStampRecognizer.java | 41 ++++++++++++------- .../unstamper/log/GeneralLogger.java | 13 +++--- .../unstamper/util/OptionManager.java | 4 +- .../amastigote/unstamper/util/TaskRunner.java | 10 +++-- 9 files changed, 61 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index a23c47b..1e698ba 100644 --- a/README.md +++ b/README.md @@ -25,13 +25,16 @@ Remove text stamps of **any font**, **any encoding** and **any language** with p ## Usage ``` -Usage: +Usage: [OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF]) [OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR]) Options: - -d, --directly directly modify the input file(s), which makes option o/O unnecessary + -d, --directly directly modify the input file(s), option o/O is + unnecessary when this option is on -r, --recursive process files in the given dir recursively + -s, --strict use strict mode, a text area is considered as water mark + only if its content strictly equals one of the keywords ``` ## Get it now @@ -48,10 +51,10 @@ Make sure you have `wget` installed. #### Run ```shell # For single file processing -➜ unstamp -i "C Recipes.pdf" -o "C Recipes.unstamped.pdf" -k www.allitebooks.com +➜ unstamp -i "C Recipes.pdf" -o "C Recipes.unstamped.pdf" -k www.allitebooks.com -s ➜ unstamp -i RoR.pdf -o RoR.unstamped.pdf -k 图灵社区会员 # Or -➜ unstamp -i "C Recipes.pdf" -d -k www.allitebooks.com +➜ unstamp -i "C Recipes.pdf" -d -k www.allitebooks.com -s ➜ unstamp -i RoR.pdf -d -k 图灵社区会员 # For massive files processing diff --git a/pom.xml b/pom.xml index 41196d1..98b3aa5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 com.amastigote unstamper - 0.1.2 + 0.1.3 Text stamp remover for PDF files. pdf-unstamper https://github.com/hwding/pdf-unstamper diff --git a/script/install b/script/install index 7cc0e59..043b5a9 100755 --- a/script/install +++ b/script/install @@ -8,7 +8,7 @@ user_bin=`echo ~`"/bin/" locl_bin="/usr/local/bin/" jar_name="pdf-unstamper.jar" exe_name="unstamp" -_version="0.1.2" +_version="0.1.3" jar_durl="https://github.com/hwding/pdf-unstamper/releases/download/$_version/$jar_name" wrapper="#!/bin/bash\njava -jar ${user_bin}${jar_name} \"\$@\"\n" diff --git a/src/com/amastigote/unstamper/Main.java b/src/com/amastigote/unstamper/Main.java index c957265..0d7cef8 100644 --- a/src/com/amastigote/unstamper/Main.java +++ b/src/com/amastigote/unstamper/Main.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Sep 05 2017 + DATE | Sep 10 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -37,7 +37,8 @@ public static void main(@NotNull String[] args) { System.exit(0); } else { TaskRunner.init( - commandLine.getOptionValues('k')); + commandLine.getOptionValues('k'), + commandLine.hasOption('s')); if (commandLine.hasOption('i') && (commandLine.hasOption('o') || commandLine.hasOption('d'))) { if (commandLine.hasOption('d')) diff --git a/src/com/amastigote/unstamper/core/Processor.java b/src/com/amastigote/unstamper/core/Processor.java index 37b6ec7..401dc19 100644 --- a/src/com/amastigote/unstamper/core/Processor.java +++ b/src/com/amastigote/unstamper/core/Processor.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Sep 05 2017 + DATE | Sep 10 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -27,7 +27,8 @@ public class Processor { public static void process( @NotNull File file, - @NotNull String[] strings) { + @NotNull String[] strings, + @NotNull boolean useStrict) { AtomicBoolean processAllOk = new AtomicBoolean(true); GeneralLogger.Processor.procInProgress(file.getName()); @@ -61,7 +62,7 @@ public static void process( if (e instanceof COSString) { /* Ignore Any Exception During Parallel Processing */ try { - if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts)) + if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts, useStrict)) ((COSString) e).setValue(new byte[0]); } catch (Exception ignored) { } diff --git a/src/com/amastigote/unstamper/core/TextStampRecognizer.java b/src/com/amastigote/unstamper/core/TextStampRecognizer.java index b0bbad3..a7dd2ad 100644 --- a/src/com/amastigote/unstamper/core/TextStampRecognizer.java +++ b/src/com/amastigote/unstamper/core/TextStampRecognizer.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Sep 05 2017 + DATE | Sep 10 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -18,15 +18,16 @@ class TextStampRecognizer { private static boolean recognizeWithFont( @NotNull String[] keywords, @NotNull byte[] inputText, - @NotNull Set pdFonts) { - String bs = generateByteString(inputText); + @NotNull Set pdFonts, + @NotNull boolean useStrict) { + String encodedInput = generateByteString(inputText); for (PDFont f : pdFonts) { if (f == null) continue; for (String k : keywords) { try { - byte[] encodedKeywords = f.encode(k); - if (bs.contains(generateByteString(encodedKeywords))) - return true; + byte[] encodedKeywordBytes = f.encode(k); + final String encodedKeyword = generateByteString(encodedKeywordBytes); + if (checkDuplicate(encodedInput, encodedKeyword, useStrict)) return true; } catch (IOException | IllegalArgumentException ignored) { } } @@ -36,26 +37,38 @@ private static boolean recognizeWithFont( private static boolean recognizePlain( @NotNull String[] keywords, - @NotNull byte[] inputText + @NotNull byte[] inputText, + @NotNull boolean useStrict ) { - for (String k : keywords) { - if (new String(inputText).contains(k)) return true; + for (String k : keywords) + if (checkDuplicate(new String(inputText), k, useStrict)) return true; + return false; + } + + private static boolean checkDuplicate( + @NotNull String input, + @NotNull String keyword, + @NotNull boolean useStrict) { + if (useStrict) { + if (input.equals(keyword)) return true; + } else { + if (input.contains(keyword)) return true; } return false; } static boolean recognize(@NotNull String[] keywords, @NotNull byte[] inputText, - @NotNull Set pdFonts) { - return recognizePlain(keywords, inputText) || - recognizeWithFont(keywords, inputText, pdFonts); + @NotNull Set pdFonts, + @NotNull boolean useStrict) { + return recognizePlain(keywords, inputText, useStrict) || + recognizeWithFont(keywords, inputText, pdFonts, useStrict); } private static String generateByteString(@NotNull byte[] bytes) { StringBuilder stringBuilder = new StringBuilder(); - for (byte b : bytes) { + for (byte b : bytes) stringBuilder.append(Byte.toString(b)); - } return stringBuilder.toString(); } } diff --git a/src/com/amastigote/unstamper/log/GeneralLogger.java b/src/com/amastigote/unstamper/log/GeneralLogger.java index 7c15a19..f228fe3 100644 --- a/src/com/amastigote/unstamper/log/GeneralLogger.java +++ b/src/com/amastigote/unstamper/log/GeneralLogger.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Sep 05 2017 + DATE | Sep 10 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -12,13 +12,16 @@ public class GeneralLogger { public static class Help { private static final String usage = - "\nPDF-UnStamper ver. 0.1.2 by hwding@GitHub\n" + + "\nPDF-UnStamper ver. 0.1.3 by hwding@GitHub\n" + "\nUsage:" + "\n [OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])" + "\n [OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])\n" + "\nOptions:" + - "\n -d, --directly directly modify the input file(s), which makes option o/O unnecessary" + - "\n -r, --recursive process files in the given dir recursively\n"; + "\n -d, --directly directly modify the input file(s), option o/O is\n" + + " unnecessary when this option is on" + + "\n -r, --recursive process files in the given dir recursively" + + "\n -s, --strict use strict mode, a text area is considered as water mark\n" + + " only if its content strictly equals one of the keywords\n"; public static void print() { System.out.println(usage); @@ -61,7 +64,7 @@ public static void procInProgress(@NotNull String fn) { } public static void procFinished() { - System.out.println(" GOOD"); + System.out.println(" done"); } } } diff --git a/src/com/amastigote/unstamper/util/OptionManager.java b/src/com/amastigote/unstamper/util/OptionManager.java index f188c5f..e47208e 100644 --- a/src/com/amastigote/unstamper/util/OptionManager.java +++ b/src/com/amastigote/unstamper/util/OptionManager.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Aug 25 2017 + DATE | Sep 10 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -18,6 +18,7 @@ public class OptionManager { private final static Option optionK = new Option("k", true, null); private final static Option optionD = new Option("d", "directly", false, null); private final static Option optionR = new Option("r", "recursive", false, null); + private final static Option optionS = new Option("s", "strict", false, null); public static Options buildOptions() { Options options = new Options(); @@ -34,6 +35,7 @@ public static Options buildOptions() { options.addOption(optionD); options.addOption(optionR); options.addOption(optionK); + options.addOption(optionS); return options; } } diff --git a/src/com/amastigote/unstamper/util/TaskRunner.java b/src/com/amastigote/unstamper/util/TaskRunner.java index 8c83171..f744b28 100644 --- a/src/com/amastigote/unstamper/util/TaskRunner.java +++ b/src/com/amastigote/unstamper/util/TaskRunner.java @@ -1,6 +1,6 @@ /* AUTH | hwding - DATE | Sep 05 2017 + DATE | Sep 10 2017 DESC | text stamp remover for PDF files MAIL | m@amastigote.com GITH | github.com/hwding @@ -19,9 +19,13 @@ public class TaskRunner { private static String[] keywords; + private static boolean useStrict; - public static void init(@NotNull String[] keywords) { + public static void init( + @NotNull String[] keywords, + @NotNull boolean useStrict) { TaskRunner.keywords = keywords; + TaskRunner.useStrict = useStrict; } public static void procSingleFile( @@ -42,7 +46,7 @@ public static void procSingleFile( } private static void submitToProcessor(@NotNull File file) { - Processor.process(file, keywords); + Processor.process(file, keywords, useStrict); } public static void procSingleFileDirectly(@NotNull String ifn) {