Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pdf-diff server edition #7

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@
# vendor/

*.pdf
generated/
generated/
data/
pdf-diff
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ go build

Once ran, the images are created in the folder `generated`.

#### Available flags

A list of common flags that you can use via `pdf-diff` is provided in the following table.

Name | Description
-- | --
`color` | this flag can be used to specify the background color that is composed for highlighting pdf differences
`server` | this flag can be used to enable a web ui

### Contact

If you wish to use this for your project, go ahead. If you have any issues or improvements, feel free to open a new [ISSUE](https://github.com/serhack/pdf-diff/issues). Lastly, if you have a good algorithm to implement or just to discuss about any other tools for editor, you can [email me]([email protected]).
Expand Down
75 changes: 55 additions & 20 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type Pixel struct {
}

var rmaster, gmaster, bmaster float64
var hash1 string

func rgbaToPixel(r uint32, g uint32, b uint32, a uint32) Pixel {
return Pixel{uint8(r >> 8), uint8(g >> 8), uint8(b >> 8), uint8(a >> 8), false}
Expand All @@ -34,19 +35,30 @@ func CreatePNG(PDFPath string) {
folderName := ComputeSha256(PDFPath)

// Checks if a folder with the name sha256(file) already exists
if _, err := os.Stat(folderName); err == nil {
if _, err := os.Stat("data/" + folderName); err == nil {
return
}

// If not, probably we never met this pdf. Create the folder
err := os.Mkdir(folderName, os.ModePerm)
err := os.Mkdir("data/" +folderName, os.ModePerm)
if err != nil {
panic(err)
}

file, err := os.Create("data/" + folderName + "/.tmp")
if err != nil {
panic(err)
}
defer file.Close()

// Create the images
cmd, _ := exec.Command("pdftoppm", "-png", PDFPath, folderName+"/png_gen").Output()
cmd, _ := exec.Command("pdftoppm", "-png", PDFPath, "data/" + folderName+"/png_gen").Output()
fmt.Println(cmd)

err = os.Remove("data/" + folderName + ".tmp")
if err != nil {
panic(err)
}
}

func RetrievePixel(fileName string) ([][]Pixel, int, int) {
Expand Down Expand Up @@ -136,7 +148,7 @@ func CompareSingleImage(path1 string, path2 string, i int) {
}

// Create the file under "generated" folder
f, err := os.Create("generated/image-" + strconv.Itoa(i) + ".png")
f, err := os.Create("data/generated/" + hash1 + "/image-" + strconv.Itoa(i) + ".png")
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -181,15 +193,32 @@ func Compare(PDF1 string, PDF2 string) {
// Compares the two files

shaPDF1 := ComputeSha256(PDF1)
hash1 = shaPDF1
shaPDF2 := ComputeSha256(PDF2)

if _, err := os.Stat("generated"); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir("generated", os.ModePerm)
if _, err := os.Stat("data"); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir("data", os.ModePerm)
if err != nil {
panic(err)
}
}

if _, err := os.Stat("data/generated"); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir("data/generated", os.ModePerm)
if err != nil {
panic(err)
}
}

if _, err := os.Stat("data/generated/" + shaPDF1); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir("data/generated/" + shaPDF1, os.ModePerm)
if err != nil {
panic(err)
}
} else {
return
}

i := 1
k := 1
for {
Expand All @@ -200,8 +229,8 @@ func Compare(PDF1 string, PDF2 string) {
o := fmt.Sprintf("%d", k)
s := fmt.Sprintf("%0"+o+"d", i)

s_pdf1 := shaPDF1 + "/png_gen-" + s + ".png"
s_pdf2 := shaPDF2 + "/png_gen-" + s + ".png"
s_pdf1 := "data/" +shaPDF1 + "/png_gen-" + s + ".png"
s_pdf2 := "data/" + shaPDF2 + "/png_gen-" + s + ".png"

if _, err := os.Stat(s_pdf1); errors.Is(err, os.ErrNotExist) {
k++
Expand Down Expand Up @@ -232,21 +261,27 @@ func main() {
// flags

color := flag.String("color", "ff2010", "hex value for the background color for highlighting")
enableServer := flag.Bool("server", false, "flag to enable local server for pdf-diff")

flag.Parse()

arguments := flag.Args()
if !*enableServer {
arguments := flag.Args()

if len(arguments) < 2 {
fmt.Println("pdf-diff: highlights the differences between two pdf files.")
fmt.Println("Usage: pdf-diff pdf-file-1 pdf-file-2 [-color] hex-color")
fmt.Println()
flag.PrintDefaults()
os.Exit(1)
}
if len(arguments) < 2 {
fmt.Println("pdf-diff: highlights the differences between two pdf files.")
fmt.Println("Usage: pdf-diff pdf-file-1 pdf-file-2 [-color] hex-color")
fmt.Println()
flag.PrintDefaults()
os.Exit(1)
}

hexToRGB(*color)
CreatePNG(arguments[0])
CreatePNG(arguments[1])
Compare(arguments[0], arguments[1])
hexToRGB(*color)
CreatePNG(arguments[0])
CreatePNG(arguments[1])
Compare(arguments[0], arguments[1])
} else {
StartServer()
}

}
217 changes: 217 additions & 0 deletions server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
package main

import (
"errors"
"fmt"
"html/template"
"io"
"net/http"
"os"
"path/filepath"
"strings"
)

type DiffImage struct {
Number int // page number
Filename string // file1
}

type ResultPage struct {
Hash1 string
Hash2 string
Differences []DiffImage
}

func indexController(w http.ResponseWriter, r *http.Request) {
if r.Method != "GET" {
fmt.Println("A new request has been made on / but the method " + r.Method + " was not supported.")
return
}

// TODO (idea): list pdf on upload folders

// Display the compare page
t, _ := template.ParseFiles("templates/index.html")
t.Execute(w, nil)

}

func compareController(w http.ResponseWriter, r *http.Request) {
// Set a limit of 32 MB per request
r.Body = http.MaxBytesReader(w, r.Body, 32<<20)

if r.Method == "GET" {
// Redirect to index page
http.Redirect(w, r, "/", http.StatusMovedPermanently)
} else if r.Method == "POST" {
parseErr := r.ParseMultipartForm(32 << 20)
if parseErr != nil {
http.Error(w, "failed to parse multipart message", http.StatusBadRequest)
return
}

if len(r.MultipartForm.File) != 2 {
http.Error(w, "two file pdfs per comparision", http.StatusBadRequest)
return
}

// Grab the two PDF(s) from the form
pdfFile1 := r.MultipartForm.File["pdf-1"]
pdfFile2 := r.MultipartForm.File["pdf-2"]

// Check if the two files are PDF

file1, err := pdfFile1[0].Open()
if err != nil {
panic(err)
}
defer file1.Close()

buff := make([]byte, 512)
if _, err = file1.Read(buff); err != nil {
panic(err)
}

var pdf1hash string

if http.DetectContentType(buff) == "application/pdf" {
out, err := os.Create("data/uploads/" + filepath.Clean(pdfFile1[0].Filename))
if err != nil {
panic(err)
}
_, err = file1.Seek(0, io.SeekStart)
if err != nil {
panic(err)
}
io.Copy(out, file1)
pdf1hash = ComputeSha256("data/uploads/" + filepath.Clean(pdfFile1[0].Filename))
}

file2, err := pdfFile2[0].Open()
if err != nil {
panic(err)
}
defer file2.Close()
if _, err = file2.Read(buff); err != nil {
panic(err)
}

var pdf2hash string

if http.DetectContentType(buff) == "application/pdf" {
// Write them in upload folder
out, err := os.Create("data/uploads/" + filepath.Clean(pdfFile2[0].Filename))
if err != nil {
panic(err)
}
_, err = file2.Seek(0, io.SeekStart)
if err != nil {
panic(err)
}
io.Copy(out, file2)
pdf2hash = ComputeSha256("data/uploads/" + filepath.Clean(pdfFile2[0].Filename))
}

fmt.Println("Starting a new job....")

// Start the job

hexToRGB("ff2010")
go CreatePNG("data/uploads/" + filepath.Clean(pdfFile1[0].Filename))
go CreatePNG("data/uploads/" + filepath.Clean(pdfFile2[0].Filename))
go Compare("data/uploads/"+filepath.Clean(pdfFile1[0].Filename), "data/uploads/"+filepath.Clean(pdfFile2[0].Filename))

// Redirect to result page
http.Redirect(w, r, "/compare/"+pdf1hash+"-"+pdf2hash, http.StatusMovedPermanently)

} else {
fmt.Println("A new request has been made on /compare but the method " + r.Method + " was not supported.")
return
}

}

func retrieveFilesController(w http.ResponseWriter, r *http.Request) {
slug := r.URL.Path[len("/compare/"):]
if len(slug) == 0 {
http.Redirect(w, r, "/", http.StatusMovedPermanently)
return
}
hashes := strings.Split(slug, "-")
fmt.Printf("%s ", hashes[0])
fmt.Printf("%s \n", hashes[1])

// Checks if the folder were already created

if _, err := os.Stat("data/generated/" + hashes[0]); errors.Is(err, os.ErrNotExist) {
http.Error(w, "The two pdfs ("+hashes[0]+", "+hashes[1]+") were not compared.", http.StatusNotFound)
return
}

if _, err := os.Stat("data/" + hashes[0] + "/.tmp"); errors.Is(err, os.ErrExist) {
http.Error(w, "The images are being created. It should take a few seconds.", http.StatusOK)
return
}

if _, err := os.Stat("data/generated/" + hashes[0] + "/.tmp"); errors.Is(err, os.ErrExist) {
http.Error(w, "pdf-diff takes a while to generate all the images.", http.StatusOK)
return
}

// Checks the result generated

// List all the images given a filename (e.g. filename-1.png)

f, err := os.Open("data/generated/" + hashes[0])
if err != nil {
fmt.Println(err)
return
}
files, err := f.Readdir(0)
if err != nil {
fmt.Println(err)
return
}
i := 0
var differences []DiffImage

for _, v := range files {
single := DiffImage{
Number: i,
Filename: v.Name(),
}
differences = append(differences, single)
i++
}

structure := ResultPage{
Hash1: hashes[0],
Hash2: hashes[1],
Differences: differences,
}

t := template.Must(template.ParseFiles("templates/result.html"))
if err != nil {
panic(err)
}

err = t.Execute(w, structure)
if err != nil {
panic(err)
}
}

func StartServer() {

http.HandleFunc("/", indexController)
http.HandleFunc("/compare", compareController)
http.HandleFunc("/compare/", retrieveFilesController)

http.Handle("/results/", http.StripPrefix("/results/", http.FileServer(http.Dir("./data"))))

err := http.ListenAndServe(":8080", nil)
if err != nil {
panic(err)
}

}
Loading