From 5a9aa6b0b17d95a088e0627a5ded3a68e88dd9ba Mon Sep 17 00:00:00 2001 From: Alexander Pikeev Date: Mon, 23 Dec 2024 14:00:52 +0300 Subject: [PATCH] feat: add new split routes (#31) * feat: add new split routes * fix: linter issues fixes --------- Co-authored-by: Alexander Pikeev --- Makefile | 2 +- README.md | 2 +- build/tests/Dockerfile | 2 +- fields.go | 6 ++ html_test.go | 4 ++ libreoffice_test.go | 26 ++----- markdown_test.go | 41 ++++------- pdfengines_test.go | 8 +++ split_intervals.go | 42 +++++++++++ split_pages.go | 46 ++++++++++++ split_test.go | 160 +++++++++++++++++++++++++++++++++++++++++ test/testfunc.go | 141 ++++++++++++++++++++++++++++++++++++ url_test.go | 12 ++++ 13 files changed, 441 insertions(+), 51 deletions(-) create mode 100644 split_intervals.go create mode 100644 split_pages.go create mode 100644 split_test.go diff --git a/Makefile b/Makefile index 3cc6f8b..5e0b976 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ GOLANG_VERSION=1.23.2 -GOTENBERG_VERSION=8.14.1 +GOTENBERG_VERSION=edge GOLANGCI_LINT_VERSION=1.61.0 REPO=starwalkn/gotenberg-go-client/v8 diff --git a/README.md b/README.md index bacf3fc..4a76122 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ |Gotenberg version | Client version | |:----------------:|:----------------------------------------------------------------------------------------------------------:| -|`8.x` **(actual)**| `8.7.4` **(actual)**
| +|`8.x` **(actual)**| `8.8.0` **(actual)**
| |`7.x` | `<= 8.5.0` | |`6.x` | thecodingmachine/gotenberg-go-client | diff --git a/build/tests/Dockerfile b/build/tests/Dockerfile index 305d97c..e21a129 100644 --- a/build/tests/Dockerfile +++ b/build/tests/Dockerfile @@ -3,7 +3,7 @@ ARG GOTENBERG_VERSION FROM golang:${GOLANG_VERSION:-1.23.2}-alpine AS golang -FROM gotenberg/gotenberg:${GOTENBERG_VERSION:-8.14.1} +FROM gotenberg/gotenberg:${GOTENBERG_VERSION:-8.15.0} USER root diff --git a/fields.go b/fields.go index 6f8388f..d62aafe 100644 --- a/fields.go +++ b/fields.go @@ -90,3 +90,9 @@ const ( fieldMergePdfA formField = "pdfa" fieldMergePdfUA formField = "pdfua" ) + +const ( + fieldSplitMode = "splitMode" + fieldSplitSpan = "splitSpan" + fieldSplitUnify = "splitUnify" +) diff --git a/html_test.go b/html_test.go index 0f44637..776441c 100644 --- a/html_test.go +++ b/html_test.go @@ -96,6 +96,10 @@ func TestHTMLScreenshot(t *testing.T) { err = c.StoreScreenshot(context.Background(), req, dest) require.NoError(t, err) assert.FileExists(t, dest) + + isValidJPEG, err := test.IsValidJPEG(dest) + require.NoError(t, err) + assert.True(t, isValidJPEG) } func TestHTMLPdfA(t *testing.T) { diff --git a/libreoffice_test.go b/libreoffice_test.go index 121c4b0..de8fa22 100644 --- a/libreoffice_test.go +++ b/libreoffice_test.go @@ -1,7 +1,6 @@ package gotenberg import ( - "archive/zip" "context" "fmt" "net/http" @@ -114,25 +113,10 @@ func TestLibreOfficeMultipleWithoutMerge(t *testing.T) { require.NoError(t, err) assert.FileExists(t, dest) - zipReader, err := zip.OpenReader(dest) - require.NoError(t, err) - - expectedFiles := map[string]bool{ - "document1.docx.pdf": false, - "document2.docx.pdf": false, - } - - for _, file := range zipReader.File { - if _, ok := expectedFiles[file.Name]; ok { - expectedFiles[file.Name] = true - } - } - - for fileName, found := range expectedFiles { - assert.True(t, found, "File %s not found in zip", fileName) - } - err = zipReader.Close() + count, isPDFs, err := test.IsPDFsInArchive(t, dest) require.NoError(t, err) + assert.Equal(t, 2, count) + assert.True(t, isPDFs) } func TestLibreOfficeMultipleWithMerge(t *testing.T) { @@ -156,6 +140,10 @@ func TestLibreOfficeMultipleWithMerge(t *testing.T) { isPDF, err := test.IsPDF(dest) require.NoError(t, err) assert.True(t, isPDF) + + count, err := test.GetPDFPageCount(dest) + require.NoError(t, err) + assert.Equal(t, 4, count) } func TestLibreOfficePdfA(t *testing.T) { diff --git a/markdown_test.go b/markdown_test.go index fdec769..209ab2c 100644 --- a/markdown_test.go +++ b/markdown_test.go @@ -36,35 +36,6 @@ func TestMarkdown(t *testing.T) { }) require.NoError(t, err) - dirPath := t.TempDir() - dest := fmt.Sprintf("%s/foo.pdf", dirPath) - err = c.Store(context.Background(), req, dest) - require.NoError(t, err) - assert.FileExists(t, dest) -} - -func TestMarkdownComplete(t *testing.T) { - c, err := NewClient("http://localhost:3000", http.DefaultClient) - require.NoError(t, err) - - index, err := document.FromPath("index.html", test.MarkdownTestFilePath(t, "index.html")) - require.NoError(t, err) - markdown1, err := document.FromPath("paragraph1.md", test.MarkdownTestFilePath(t, "paragraph1.md")) - require.NoError(t, err) - markdown2, err := document.FromPath("paragraph2.md", test.MarkdownTestFilePath(t, "paragraph2.md")) - require.NoError(t, err) - markdown3, err := document.FromPath("paragraph3.md", test.MarkdownTestFilePath(t, "paragraph3.md")) - require.NoError(t, err) - req := NewMarkdownRequest(index, markdown1, markdown2, markdown3) - req.Trace("testMarkdownComplete") - req.UseBasicAuth("foo", "bar") - - err = req.ExtraHTTPHeaders(map[string]string{ - "X-Header": "Value", - "X-Scoped-Header": `value;scope=https?:\\/\\/([a-zA-Z0-9-]+\\.)*domain\\.com\\/.*`, - }) - require.NoError(t, err) - header, err := document.FromPath("header.html", test.MarkdownTestFilePath(t, "header.html")) require.NoError(t, err) req.Header(header) @@ -87,6 +58,14 @@ func TestMarkdownComplete(t *testing.T) { err = c.Store(context.Background(), req, dest) require.NoError(t, err) assert.FileExists(t, dest) + + isPDF, err := test.IsPDF(dest) + require.NoError(t, err) + assert.True(t, isPDF) + + count, err := test.GetPDFPageCount(dest) + require.NoError(t, err) + assert.Equal(t, 2, count) } func TestMarkdownPageRanges(t *testing.T) { @@ -146,4 +125,8 @@ func TestMarkdownScreenshot(t *testing.T) { err = c.StoreScreenshot(context.Background(), req, dest) require.NoError(t, err) assert.FileExists(t, dest) + + isValidJPEG, err := test.IsValidJPEG(dest) + require.NoError(t, err) + assert.True(t, isValidJPEG) } diff --git a/pdfengines_test.go b/pdfengines_test.go index f8ea957..1fd32de 100644 --- a/pdfengines_test.go +++ b/pdfengines_test.go @@ -30,4 +30,12 @@ func TestMerge(t *testing.T) { err = c.Store(context.Background(), req, dest) require.NoError(t, err) assert.FileExists(t, dest) + + isPDF, err := test.IsPDF(dest) + require.NoError(t, err) + assert.True(t, isPDF) + + count, err := test.GetPDFPageCount(dest) + require.NoError(t, err) + assert.Equal(t, 6, count) } diff --git a/split_intervals.go b/split_intervals.go new file mode 100644 index 0000000..bebb22f --- /dev/null +++ b/split_intervals.go @@ -0,0 +1,42 @@ +package gotenberg + +import ( + "strconv" + + "github.com/starwalkn/gotenberg-go-client/v8/document" +) + +type SplitIntervalsRequest struct { + pdfs []document.Document + + *baseRequest +} + +func NewSplitIntervalsRequest(pdfs ...document.Document) *SplitIntervalsRequest { + br := newBaseRequest() + br.fields[fieldSplitMode] = "intervals" + + return &SplitIntervalsRequest{ + pdfs: pdfs, + baseRequest: br, + } +} + +func (req *SplitIntervalsRequest) endpoint() string { + return "/forms/pdfengines/split" +} + +func (req *SplitIntervalsRequest) formDocuments() map[string]document.Document { + files := make(map[string]document.Document) + + for _, pdf := range req.pdfs { + files[pdf.Filename()] = pdf + } + + return files +} + +// SplitSpan sets the interval for split. +func (req *SplitIntervalsRequest) SplitSpan(span int) { + req.fields[fieldSplitSpan] = strconv.Itoa(span) +} diff --git a/split_pages.go b/split_pages.go new file mode 100644 index 0000000..4849d70 --- /dev/null +++ b/split_pages.go @@ -0,0 +1,46 @@ +package gotenberg + +import ( + "strconv" + + "github.com/starwalkn/gotenberg-go-client/v8/document" +) + +type SplitPagesRequest struct { + pdfs []document.Document + + *baseRequest +} + +func NewSplitPagesRequest(pdfs ...document.Document) *SplitPagesRequest { + br := newBaseRequest() + br.fields[fieldSplitMode] = "pages" + + return &SplitPagesRequest{ + pdfs: pdfs, + baseRequest: br, + } +} + +func (req *SplitPagesRequest) endpoint() string { + return "/forms/pdfengines/split" +} + +func (req *SplitPagesRequest) formDocuments() map[string]document.Document { + files := make(map[string]document.Document) + + for _, pdf := range req.pdfs { + files[pdf.Filename()] = pdf + } + + return files +} + +// SplitSpan sets the interval for split. +func (req *SplitPagesRequest) SplitSpan(span string) { + req.fields[fieldSplitSpan] = span +} + +func (req *SplitPagesRequest) SplitUnify(val bool) { + req.fields[fieldSplitUnify] = strconv.FormatBool(val) +} diff --git a/split_test.go b/split_test.go new file mode 100644 index 0000000..4eb7dfb --- /dev/null +++ b/split_test.go @@ -0,0 +1,160 @@ +package gotenberg + +import ( + "context" + "fmt" + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/starwalkn/gotenberg-go-client/v8/document" + "github.com/starwalkn/gotenberg-go-client/v8/test" +) + +func TestSplitIntervals(t *testing.T) { + c, err := NewClient("http://localhost:3000", http.DefaultClient) + require.NoError(t, err) + + doc, err := document.FromPath("gotenberg1.pdf", test.PDFTestFilePath(t, "gotenberg.pdf")) + require.NoError(t, err) + + r := NewSplitIntervalsRequest(doc) + r.Trace("testSplitIntervals") + r.UseBasicAuth("foo", "bar") + + var ( + span = 1 + expectedCount = 3 + ) + + r.SplitSpan(span) + r.OutputFilename("splitted.zip") + + dirPath := t.TempDir() + dest := fmt.Sprintf("%s/splitted.zip", dirPath) + err = c.Store(context.Background(), r, dest) + require.NoError(t, err) + assert.FileExists(t, dest) + + count, isPDFs, err := test.IsPDFsInArchive(t, dest) + require.NoError(t, err) + + require.Equal(t, expectedCount, count) + require.True(t, isPDFs) +} + +func TestSplitIntervalsOnePage(t *testing.T) { + c, err := NewClient("http://localhost:3000", http.DefaultClient) + require.NoError(t, err) + + doc, err := document.FromPath("gotenberg1.pdf", test.PDFTestFilePath(t, "gotenberg.pdf")) + require.NoError(t, err) + + r := NewSplitIntervalsRequest(doc) + r.Trace("testSplitIntervals") + r.UseBasicAuth("foo", "bar") + + r.SplitSpan(3) + r.OutputFilename("splitted.pdf") + + dirPath := t.TempDir() + dest := fmt.Sprintf("%s/splitted.pdf", dirPath) + err = c.Store(context.Background(), r, dest) + require.NoError(t, err) + assert.FileExists(t, dest) + + isPDF, err := test.IsPDF(dest) + require.NoError(t, err) + require.True(t, isPDF) +} + +func TestSplitPages(t *testing.T) { + c, err := NewClient("http://localhost:3000", http.DefaultClient) + require.NoError(t, err) + + doc, err := document.FromPath("gotenberg1.pdf", test.PDFTestFilePath(t, "gotenberg.pdf")) + require.NoError(t, err) + + r := NewSplitPagesRequest(doc) + r.Trace("testSplitPages") + r.UseBasicAuth("foo", "bar") + + var ( + span = "1-2" + expectedCount = 2 + ) + + r.SplitSpan(span) + r.SplitUnify(false) + r.OutputFilename("splitted.zip") + + dirPath := t.TempDir() + dest := fmt.Sprintf("%s/splitted.zip", dirPath) + err = c.Store(context.Background(), r, dest) + require.NoError(t, err) + assert.FileExists(t, dest) + + count, isPDFs, err := test.IsPDFsInArchive(t, dest) + require.NoError(t, err) + + require.Equal(t, expectedCount, count) + require.True(t, isPDFs) +} + +func TestSplitPagesOnePage(t *testing.T) { + c, err := NewClient("http://localhost:3000", http.DefaultClient) + require.NoError(t, err) + + doc, err := document.FromPath("gotenberg1.pdf", test.PDFTestFilePath(t, "gotenberg.pdf")) + require.NoError(t, err) + + r := NewSplitPagesRequest(doc) + r.Trace("testSplitPagesOnePage") + r.UseBasicAuth("foo", "bar") + + r.SplitSpan("1-1") + r.SplitUnify(false) + r.OutputFilename("splitted.pdf") + + dirPath := t.TempDir() + dest := fmt.Sprintf("%s/splitted.pdf", dirPath) + err = c.Store(context.Background(), r, dest) + require.NoError(t, err) + assert.FileExists(t, dest) + + isPDF, err := test.IsPDF(dest) + require.NoError(t, err) + require.True(t, isPDF) +} + +func TestSplitPagesUnify(t *testing.T) { + c, err := NewClient("http://localhost:3000", http.DefaultClient) + require.NoError(t, err) + + doc, err := document.FromPath("gotenberg1.pdf", test.PDFTestFilePath(t, "gotenberg.pdf")) + require.NoError(t, err) + + r := NewSplitPagesRequest(doc) + r.Trace("testSplitPagesOnePage") + r.UseBasicAuth("foo", "bar") + + r.SplitSpan("1-2") + r.SplitUnify(true) + r.OutputFilename("splitted.pdf") + + dirPath := t.TempDir() + dest := fmt.Sprintf("%s/splitted.pdf", dirPath) + err = c.Store(context.Background(), r, dest) + require.NoError(t, err) + assert.FileExists(t, dest) + + isPDF, err := test.IsPDF(dest) + require.NoError(t, err) + require.True(t, isPDF) + + pageCount, err := test.GetPDFPageCount(dest) + require.NoError(t, err) + require.Equal(t, 2, pageCount) +} diff --git a/test/testfunc.go b/test/testfunc.go index 69c5756..d0bfb96 100644 --- a/test/testfunc.go +++ b/test/testfunc.go @@ -2,12 +2,16 @@ package test import ( + "archive/zip" "bufio" "bytes" + "errors" "fmt" + "io" "os" "path" "path/filepath" + "regexp" "runtime" "strings" "testing" @@ -110,3 +114,140 @@ func IsPDFUA(filePath string) (bool, error) { return false, nil } + +// IsPDFsInArchive checks if the files inside the archive are PDF files +// and additionally returns the number of files in the archive. +func IsPDFsInArchive(t *testing.T, path string) (int, bool, error) { + reader, err := zip.OpenReader(path) + if err != nil { + return 0, false, err + } + defer reader.Close() + + for _, file := range reader.File { + if file.FileInfo().IsDir() { + continue + } + + tempDir := t.TempDir() + + if err = extractFile(file, tempDir); err != nil { + return 0, false, err + } + + var isPDF bool + + isPDF, err = IsPDF(tempDir + file.Name) + if err != nil { + return 0, false, err + } + + if !isPDF { + return 0, false, nil + } + } + + return len(reader.File), true, nil +} + +// extractFile extracts a file from a zip archive and saves it to a temporary directory. +// The file will have the same name as it had inside the archive. +func extractFile(zipFile *zip.File, tempDir string) error { + if err := os.MkdirAll(filepath.Dir(tempDir), 0o600); err != nil { + return err + } + + src, err := zipFile.Open() + if err != nil { + return err + } + defer src.Close() + + dest, err := os.Create(tempDir + zipFile.Name) + if err != nil { + return err + } + defer dest.Close() + + if _, err = io.Copy(dest, src); err != nil { //nolint:gosec // it is only for tests + return err + } + + return nil +} + +// GetPDFPageCount returns PDF file page count. +func GetPDFPageCount(filePath string) (int, error) { + file, err := os.Open(filePath) + if err != nil { + return 0, err + } + defer file.Close() + + countRegex := regexp.MustCompile(`/Count\s+(\d+)`) + + scanner := bufio.NewScanner(file) + var pageCount int + + for scanner.Scan() { + line := scanner.Text() + matches := countRegex.FindStringSubmatch(line) + if len(matches) == 2 { + _, err = fmt.Sscanf(matches[1], "%d", &pageCount) + if err != nil { + return 0, err + } + + break + } + } + + if err = scanner.Err(); err != nil { + return 0, err + } + + if pageCount == 0 { + return 0, errors.New("could not find page count") + } + + return pageCount, nil +} + +func IsValidJPEG(path string) (bool, error) { + file, err := os.Open(path) + if err != nil { + return false, fmt.Errorf("could not open file: %w", err) + } + defer file.Close() + + startSignature := []byte{0xFF, 0xD8} + buffer := make([]byte, 2) + if _, err = io.ReadFull(file, buffer); err != nil { + return false, fmt.Errorf("could not read file: %w", err) + } + if !bytes.Equal(buffer, startSignature) { + return false, nil + } + + fileInfo, err := file.Stat() + if err != nil { + return false, fmt.Errorf("could not get file info: %w", err) + } + fileSize := fileInfo.Size() + if fileSize < 4 { + return false, nil + } + + if _, err = file.Seek(-2, io.SeekEnd); err != nil { + return false, fmt.Errorf("could not seek file: %w", err) + } + endSignature := []byte{0xFF, 0xD9} + if _, err = io.ReadFull(file, buffer); err != nil { + return false, fmt.Errorf("could not read file: %w", err) + } + if !bytes.Equal(buffer, endSignature) { + return false, nil + } + + return true, nil +} diff --git a/url_test.go b/url_test.go index ce7ae9b..1edac03 100644 --- a/url_test.go +++ b/url_test.go @@ -26,6 +26,10 @@ func TestURL(t *testing.T) { err = c.Store(context.Background(), req, dest) require.NoError(t, err) assert.FileExists(t, dest) + + isPDF, err := test.IsPDF(dest) + require.NoError(t, err) + assert.True(t, isPDF) } func TestURLComplete(t *testing.T) { @@ -50,6 +54,10 @@ func TestURLComplete(t *testing.T) { err = c.Store(context.Background(), req, dest) require.NoError(t, err) assert.FileExists(t, dest) + + isPDF, err := test.IsPDF(dest) + require.NoError(t, err) + assert.True(t, isPDF) } func TestURLPageRanges(t *testing.T) { @@ -78,4 +86,8 @@ func TestURLScreenshot(t *testing.T) { err = c.StoreScreenshot(context.Background(), req, dest) require.NoError(t, err) assert.FileExists(t, dest) + + isValidJPEG, err := test.IsValidJPEG(dest) + require.NoError(t, err) + assert.True(t, isValidJPEG) }