pdf-diff
+Choose two file pdfs with the same dimensions to compare.
pdf-diffwill captures each page as an image and compares those. +
+ +
+ +
diff --git a/.gitignore b/.gitignore index b223398..3e44013 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,6 @@ # vendor/ *.pdf -generated/ \ No newline at end of file +generated/ +data/ +pdf-diff \ No newline at end of file diff --git a/README.md b/README.md index 0a9740c..645dd31 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,15 @@ go build Once ran, the images are created in the folder `generated`. +#### Available flags + +A list of common flags that you can use via `pdf-diff` is provided in the following table. + +Name | Description +-- | -- +`color` | this flag can be used to specify the background color that is composed for highlighting pdf differences +`server` | this flag can be used to enable a web ui + ### Contact If you wish to use this for your project, go ahead. If you have any issues or improvements, feel free to open a new [ISSUE](https://github.com/serhack/pdf-diff/issues). Lastly, if you have a good algorithm to implement or just to discuss about any other tools for editor, you can [email me](hi@serhack.me). diff --git a/main.go b/main.go index e31975b..2250fe1 100644 --- a/main.go +++ b/main.go @@ -21,6 +21,7 @@ type Pixel struct { } var rmaster, gmaster, bmaster float64 +var hash1 string func rgbaToPixel(r uint32, g uint32, b uint32, a uint32) Pixel { return Pixel{uint8(r >> 8), uint8(g >> 8), uint8(b >> 8), uint8(a >> 8), false} @@ -34,19 +35,30 @@ func CreatePNG(PDFPath string) { folderName := ComputeSha256(PDFPath) // Checks if a folder with the name sha256(file) already exists - if _, err := os.Stat(folderName); err == nil { + if _, err := os.Stat("data/" + folderName); err == nil { return } // If not, probably we never met this pdf. Create the folder - err := os.Mkdir(folderName, os.ModePerm) + err := os.Mkdir("data/" +folderName, os.ModePerm) if err != nil { panic(err) } + file, err := os.Create("data/" + folderName + "/.tmp") + if err != nil { + panic(err) + } + defer file.Close() + // Create the images - cmd, _ := exec.Command("pdftoppm", "-png", PDFPath, folderName+"/png_gen").Output() + cmd, _ := exec.Command("pdftoppm", "-png", PDFPath, "data/" + folderName+"/png_gen").Output() fmt.Println(cmd) + + err = os.Remove("data/" + folderName + ".tmp") + if err != nil { + panic(err) + } } func RetrievePixel(fileName string) ([][]Pixel, int, int) { @@ -136,7 +148,7 @@ func CompareSingleImage(path1 string, path2 string, i int) { } // Create the file under "generated" folder - f, err := os.Create("generated/image-" + strconv.Itoa(i) + ".png") + f, err := os.Create("data/generated/" + hash1 + "/image-" + strconv.Itoa(i) + ".png") if err != nil { panic(err) } @@ -181,15 +193,32 @@ func Compare(PDF1 string, PDF2 string) { // Compares the two files shaPDF1 := ComputeSha256(PDF1) + hash1 = shaPDF1 shaPDF2 := ComputeSha256(PDF2) - if _, err := os.Stat("generated"); errors.Is(err, os.ErrNotExist) { - err := os.Mkdir("generated", os.ModePerm) + if _, err := os.Stat("data"); errors.Is(err, os.ErrNotExist) { + err := os.Mkdir("data", os.ModePerm) if err != nil { panic(err) } } + if _, err := os.Stat("data/generated"); errors.Is(err, os.ErrNotExist) { + err := os.Mkdir("data/generated", os.ModePerm) + if err != nil { + panic(err) + } + } + + if _, err := os.Stat("data/generated/" + shaPDF1); errors.Is(err, os.ErrNotExist) { + err := os.Mkdir("data/generated/" + shaPDF1, os.ModePerm) + if err != nil { + panic(err) + } + } else { + return + } + i := 1 k := 1 for { @@ -200,8 +229,8 @@ func Compare(PDF1 string, PDF2 string) { o := fmt.Sprintf("%d", k) s := fmt.Sprintf("%0"+o+"d", i) - s_pdf1 := shaPDF1 + "/png_gen-" + s + ".png" - s_pdf2 := shaPDF2 + "/png_gen-" + s + ".png" + s_pdf1 := "data/" +shaPDF1 + "/png_gen-" + s + ".png" + s_pdf2 := "data/" + shaPDF2 + "/png_gen-" + s + ".png" if _, err := os.Stat(s_pdf1); errors.Is(err, os.ErrNotExist) { k++ @@ -232,21 +261,27 @@ func main() { // flags color := flag.String("color", "ff2010", "hex value for the background color for highlighting") + enableServer := flag.Bool("server", false, "flag to enable local server for pdf-diff") + flag.Parse() - arguments := flag.Args() + if !*enableServer { + arguments := flag.Args() - if len(arguments) < 2 { - fmt.Println("pdf-diff: highlights the differences between two pdf files.") - fmt.Println("Usage: pdf-diff pdf-file-1 pdf-file-2 [-color] hex-color") - fmt.Println() - flag.PrintDefaults() - os.Exit(1) - } + if len(arguments) < 2 { + fmt.Println("pdf-diff: highlights the differences between two pdf files.") + fmt.Println("Usage: pdf-diff pdf-file-1 pdf-file-2 [-color] hex-color") + fmt.Println() + flag.PrintDefaults() + os.Exit(1) + } - hexToRGB(*color) - CreatePNG(arguments[0]) - CreatePNG(arguments[1]) - Compare(arguments[0], arguments[1]) + hexToRGB(*color) + CreatePNG(arguments[0]) + CreatePNG(arguments[1]) + Compare(arguments[0], arguments[1]) + } else { + StartServer() + } } diff --git a/server.go b/server.go new file mode 100644 index 0000000..508c7cf --- /dev/null +++ b/server.go @@ -0,0 +1,217 @@ +package main + +import ( + "errors" + "fmt" + "html/template" + "io" + "net/http" + "os" + "path/filepath" + "strings" +) + +type DiffImage struct { + Number int // page number + Filename string // file1 +} + +type ResultPage struct { + Hash1 string + Hash2 string + Differences []DiffImage +} + +func indexController(w http.ResponseWriter, r *http.Request) { + if r.Method != "GET" { + fmt.Println("A new request has been made on / but the method " + r.Method + " was not supported.") + return + } + + // TODO (idea): list pdf on upload folders + + // Display the compare page + t, _ := template.ParseFiles("templates/index.html") + t.Execute(w, nil) + +} + +func compareController(w http.ResponseWriter, r *http.Request) { + // Set a limit of 32 MB per request + r.Body = http.MaxBytesReader(w, r.Body, 32<<20) + + if r.Method == "GET" { + // Redirect to index page + http.Redirect(w, r, "/", http.StatusMovedPermanently) + } else if r.Method == "POST" { + parseErr := r.ParseMultipartForm(32 << 20) + if parseErr != nil { + http.Error(w, "failed to parse multipart message", http.StatusBadRequest) + return + } + + if len(r.MultipartForm.File) != 2 { + http.Error(w, "two file pdfs per comparision", http.StatusBadRequest) + return + } + + // Grab the two PDF(s) from the form + pdfFile1 := r.MultipartForm.File["pdf-1"] + pdfFile2 := r.MultipartForm.File["pdf-2"] + + // Check if the two files are PDF + + file1, err := pdfFile1[0].Open() + if err != nil { + panic(err) + } + defer file1.Close() + + buff := make([]byte, 512) + if _, err = file1.Read(buff); err != nil { + panic(err) + } + + var pdf1hash string + + if http.DetectContentType(buff) == "application/pdf" { + out, err := os.Create("data/uploads/" + filepath.Clean(pdfFile1[0].Filename)) + if err != nil { + panic(err) + } + _, err = file1.Seek(0, io.SeekStart) + if err != nil { + panic(err) + } + io.Copy(out, file1) + pdf1hash = ComputeSha256("data/uploads/" + filepath.Clean(pdfFile1[0].Filename)) + } + + file2, err := pdfFile2[0].Open() + if err != nil { + panic(err) + } + defer file2.Close() + if _, err = file2.Read(buff); err != nil { + panic(err) + } + + var pdf2hash string + + if http.DetectContentType(buff) == "application/pdf" { + // Write them in upload folder + out, err := os.Create("data/uploads/" + filepath.Clean(pdfFile2[0].Filename)) + if err != nil { + panic(err) + } + _, err = file2.Seek(0, io.SeekStart) + if err != nil { + panic(err) + } + io.Copy(out, file2) + pdf2hash = ComputeSha256("data/uploads/" + filepath.Clean(pdfFile2[0].Filename)) + } + + fmt.Println("Starting a new job....") + + // Start the job + + hexToRGB("ff2010") + go CreatePNG("data/uploads/" + filepath.Clean(pdfFile1[0].Filename)) + go CreatePNG("data/uploads/" + filepath.Clean(pdfFile2[0].Filename)) + go Compare("data/uploads/"+filepath.Clean(pdfFile1[0].Filename), "data/uploads/"+filepath.Clean(pdfFile2[0].Filename)) + + // Redirect to result page + http.Redirect(w, r, "/compare/"+pdf1hash+"-"+pdf2hash, http.StatusMovedPermanently) + + } else { + fmt.Println("A new request has been made on /compare but the method " + r.Method + " was not supported.") + return + } + +} + +func retrieveFilesController(w http.ResponseWriter, r *http.Request) { + slug := r.URL.Path[len("/compare/"):] + if len(slug) == 0 { + http.Redirect(w, r, "/", http.StatusMovedPermanently) + return + } + hashes := strings.Split(slug, "-") + fmt.Printf("%s ", hashes[0]) + fmt.Printf("%s \n", hashes[1]) + + // Checks if the folder were already created + + if _, err := os.Stat("data/generated/" + hashes[0]); errors.Is(err, os.ErrNotExist) { + http.Error(w, "The two pdfs ("+hashes[0]+", "+hashes[1]+") were not compared.", http.StatusNotFound) + return + } + + if _, err := os.Stat("data/" + hashes[0] + "/.tmp"); errors.Is(err, os.ErrExist) { + http.Error(w, "The images are being created. It should take a few seconds.", http.StatusOK) + return + } + + if _, err := os.Stat("data/generated/" + hashes[0] + "/.tmp"); errors.Is(err, os.ErrExist) { + http.Error(w, "pdf-diff takes a while to generate all the images.", http.StatusOK) + return + } + + // Checks the result generated + + // List all the images given a filename (e.g. filename-1.png) + + f, err := os.Open("data/generated/" + hashes[0]) + if err != nil { + fmt.Println(err) + return + } + files, err := f.Readdir(0) + if err != nil { + fmt.Println(err) + return + } + i := 0 + var differences []DiffImage + + for _, v := range files { + single := DiffImage{ + Number: i, + Filename: v.Name(), + } + differences = append(differences, single) + i++ + } + + structure := ResultPage{ + Hash1: hashes[0], + Hash2: hashes[1], + Differences: differences, + } + + t := template.Must(template.ParseFiles("templates/result.html")) + if err != nil { + panic(err) + } + + err = t.Execute(w, structure) + if err != nil { + panic(err) + } +} + +func StartServer() { + + http.HandleFunc("/", indexController) + http.HandleFunc("/compare", compareController) + http.HandleFunc("/compare/", retrieveFilesController) + + http.Handle("/results/", http.StripPrefix("/results/", http.FileServer(http.Dir("./data")))) + + err := http.ListenAndServe(":8080", nil) + if err != nil { + panic(err) + } + +} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..ac187b7 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,59 @@ + + +
+ +Choose two file pdfs with the same dimensions to compare.
pdf-diffwill captures each page as an image and compares those. +
The following is the result between
{{.Hash1}}and
{{.Hash2}}files. + Return to index + {{ range .Differences }} +
Page {{.Number}}:
+ +