Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow direct pdf archives #942

Draft
wants to merge 23 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/swagger/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,12 @@ const docTemplate = `{
"model.BookmarkDTO": {
"type": "object",
"properties": {
"archivePath": {
"type": "string"
},
"archiver": {
"type": "string"
},
"author": {
"type": "string"
},
Expand Down
6 changes: 6 additions & 0 deletions docs/swagger/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,12 @@
"model.BookmarkDTO": {
"type": "object",
"properties": {
"archivePath": {
"type": "string"
},
"archiver": {
"type": "string"
},
"author": {
"type": "string"
},
Expand Down
4 changes: 4 additions & 0 deletions docs/swagger/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ definitions:
type: object
model.BookmarkDTO:
properties:
archivePath:
type: string
archiver:
type: string
author:
type: string
create_archive:
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require (
github.com/go-sql-driver/mysql v1.8.1
github.com/gofrs/uuid/v5 v5.2.0
github.com/golang-jwt/jwt/v5 v5.2.1
github.com/huandu/go-sqlbuilder v1.30.1
github.com/jmoiron/sqlx v1.4.0
github.com/julienschmidt/httprouter v1.3.0
github.com/lib/pq v1.10.9
Expand Down Expand Up @@ -86,6 +87,7 @@ require (
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/huandu/xstrings v1.4.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rH
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/huandu/go-assert v1.1.6 h1:oaAfYxq9KNDi9qswn/6aE0EydfxSa+tWZC1KabNitYs=
github.com/huandu/go-assert v1.1.6/go.mod h1:JuIfbmYG9ykwvuxoJ3V8TB5QP+3+ajIA54Y44TmkMxs=
github.com/huandu/go-sqlbuilder v1.30.1 h1:rsneJuMBZcGpxK6YQcVtKclhFT0wbM2gmOqlTXaQc2w=
github.com/huandu/go-sqlbuilder v1.30.1/go.mod h1:mS0GAtrtW+XL6nM2/gXHRJax2RwSW1TraavWDFAc1JA=
github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU=
github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
Expand Down
57 changes: 57 additions & 0 deletions internal/archiver/pdf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package archiver

import (
"fmt"
"strings"

"github.com/go-shiori/shiori/internal/dependencies"
"github.com/go-shiori/shiori/internal/model"
)

type PDFArchiver struct {
deps *dependencies.Dependencies
}

func (a *PDFArchiver) Matches(archiverReq *model.ArchiverRequest) bool {
return strings.Contains(archiverReq.ContentType, "application/pdf")

Check warning on line 16 in internal/archiver/pdf.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/pdf.go#L15-L16

Added lines #L15 - L16 were not covered by tests
}

func (a *PDFArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) {
bookmark := &archiverReq.Bookmark

if err := a.deps.Domains.Storage.WriteData(model.GetArchivePath(bookmark), archiverReq.Content); err != nil {
return nil, fmt.Errorf("error saving pdf archive: %v", err)
}

Check warning on line 24 in internal/archiver/pdf.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/pdf.go#L19-L24

Added lines #L19 - L24 were not covered by tests

bookmark.ArchivePath = model.GetArchivePath(bookmark)
bookmark.HasArchive = true
bookmark.Archiver = model.ArchiverPDF

return bookmark, nil

Check warning on line 30 in internal/archiver/pdf.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/pdf.go#L26-L30

Added lines #L26 - L30 were not covered by tests
}

func (a *PDFArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) {
archivePath := model.GetArchivePath(&bookmark)

if !a.deps.Domains.Storage.FileExists(archivePath) {
return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID)
}

Check warning on line 38 in internal/archiver/pdf.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/pdf.go#L33-L38

Added lines #L33 - L38 were not covered by tests

archiveFile, err := a.deps.Domains.Storage.FS().Open(archivePath)
if err != nil {
return nil, fmt.Errorf("error opening pdf archive: %w", err)
}

Check warning on line 43 in internal/archiver/pdf.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/pdf.go#L40-L43

Added lines #L40 - L43 were not covered by tests

info, err := archiveFile.Stat()
if err != nil {
return nil, fmt.Errorf("error getting pdf archive info: %w", err)
}

Check warning on line 48 in internal/archiver/pdf.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/pdf.go#L45-L48

Added lines #L45 - L48 were not covered by tests

return model.NewArchiveFile(archiveFile, "application/pdf", "", info.Size()), nil

Check warning on line 50 in internal/archiver/pdf.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/pdf.go#L50

Added line #L50 was not covered by tests
}

func NewPDFArchiver(deps *dependencies.Dependencies) *PDFArchiver {
return &PDFArchiver{
deps: deps,
}
}
78 changes: 78 additions & 0 deletions internal/archiver/warc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package archiver

import (
"bytes"
"fmt"
"path/filepath"
"strings"

"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/dependencies"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/warc"
)

// LEGACY WARNING
// This file contains legacy code that will be removed once we move on to Obelisk as
// general archiver.

type WARCArchiver struct {
deps *dependencies.Dependencies
}

func (a *WARCArchiver) Matches(archiverReq *model.ArchiverRequest) bool {
// TODO: set to true for now as catch-all but we will remove this archiver soon
return true
}

func (a *WARCArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) {
processRequest := core.ProcessRequest{
DataDir: a.deps.Config.Storage.DataDir,
Bookmark: archiverReq.Bookmark,
Content: bytes.NewReader(archiverReq.Content),
ContentType: archiverReq.ContentType,
}

result, isFatalErr, err := core.ProcessBookmark(a.deps, processRequest)

if err != nil && isFatalErr {
return nil, fmt.Errorf("failed to process: %v", err)
}

Check warning on line 40 in internal/archiver/warc.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/warc.go#L39-L40

Added lines #L39 - L40 were not covered by tests

return &result, nil
}

func (a *WARCArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) {
archivePath := model.GetArchivePath(&bookmark)

if !a.deps.Domains.Storage.FileExists(archivePath) {
return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID)
}

Check warning on line 50 in internal/archiver/warc.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/warc.go#L49-L50

Added lines #L49 - L50 were not covered by tests

warcFile, err := warc.Open(filepath.Join(a.deps.Config.Storage.DataDir, archivePath))
if err != nil {
return nil, fmt.Errorf("error opening warc file: %w", err)
}

Check warning on line 55 in internal/archiver/warc.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/warc.go#L54-L55

Added lines #L54 - L55 were not covered by tests

defer warcFile.Close()

if !warcFile.HasResource(resourcePath) {
return nil, fmt.Errorf("resource %s doesn't exist in archive", resourcePath)
}

Check warning on line 61 in internal/archiver/warc.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/warc.go#L60-L61

Added lines #L60 - L61 were not covered by tests

content, contentType, err := warcFile.Read(resourcePath)
if err != nil {
return nil, fmt.Errorf("error reading resource %s: %w", resourcePath, err)
}

Check warning on line 66 in internal/archiver/warc.go

View check run for this annotation

Codecov / codecov/patch

internal/archiver/warc.go#L65-L66

Added lines #L65 - L66 were not covered by tests

// Note: Using this method to send the reader instead of `bytes.NewReader` because that
// crashes the moment we try to retrieve it for some reason. Since this is a legacy archiver
// I don't want to spend more time on this. (@fmartingr)
return model.NewArchiveFile(strings.NewReader(string(content)), contentType, "gzip", int64(len(content))), nil
}

func NewWARCArchiver(deps *dependencies.Dependencies) *WARCArchiver {
return &WARCArchiver{
deps: deps,
}
}
38 changes: 11 additions & 27 deletions internal/cmd/add.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
}

func addHandler(cmd *cobra.Command, args []string) {
cfg, deps := initShiori(cmd.Context(), cmd)
_, deps := initShiori(cmd.Context(), cmd)

Check warning on line 32 in internal/cmd/add.go

View check run for this annotation

Codecov / codecov/patch

internal/cmd/add.go#L32

Added line #L32 was not covered by tests

// Read flag and arguments
url := args[0]
Expand All @@ -38,7 +38,6 @@
tags, _ := cmd.Flags().GetStringSlice("tags")
offline, _ := cmd.Flags().GetBool("offline")
noArchival, _ := cmd.Flags().GetBool("no-archival")
logArchival, _ := cmd.Flags().GetBool("log-archival")

// Normalize input
title = validateTitle(title, "")
Expand Down Expand Up @@ -84,37 +83,22 @@
if !offline {
cInfo.Println("Downloading article...")

var isFatalErr bool
content, contentType, err := core.DownloadBookmark(book.URL)
result, err := deps.Domains.Archiver.GenerateBookmarkArchive(book)

Check warning on line 86 in internal/cmd/add.go

View check run for this annotation

Codecov / codecov/patch

internal/cmd/add.go#L86

Added line #L86 was not covered by tests
if err != nil {
cError.Printf("Failed to download: %v\n", err)
cError.Printf("Failed to download article: %v\n", err)
os.Exit(1)
}

Check warning on line 90 in internal/cmd/add.go

View check run for this annotation

Codecov / codecov/patch

internal/cmd/add.go#L88-L90

Added lines #L88 - L90 were not covered by tests

if title != "" {
result.Title = title

Check warning on line 93 in internal/cmd/add.go

View check run for this annotation

Codecov / codecov/patch

internal/cmd/add.go#L92-L93

Added lines #L92 - L93 were not covered by tests
}

if err == nil && content != nil {
request := core.ProcessRequest{
DataDir: cfg.Storage.DataDir,
Bookmark: book,
Content: content,
ContentType: contentType,
LogArchival: logArchival,
KeepTitle: title != "",
KeepExcerpt: excerpt != "",
}

book, isFatalErr, err = core.ProcessBookmark(deps, request)
content.Close()

if err != nil {
cError.Printf("Failed: %v\n", err)
}

if isFatalErr {
os.Exit(1)
}
if excerpt != "" {
result.Excerpt = excerpt

Check warning on line 97 in internal/cmd/add.go

View check run for this annotation

Codecov / codecov/patch

internal/cmd/add.go#L96-L97

Added lines #L96 - L97 were not covered by tests
}

// Save bookmark to database
_, err = deps.Database.SaveBookmarks(cmd.Context(), false, book)
_, err = deps.Database.SaveBookmarks(cmd.Context(), false, *result)

Check warning on line 101 in internal/cmd/add.go

View check run for this annotation

Codecov / codecov/patch

internal/cmd/add.go#L101

Added line #L101 was not covered by tests
if err != nil {
cError.Printf("Failed to save bookmark with content: %v\n", err)
os.Exit(1)
Expand Down
8 changes: 7 additions & 1 deletion internal/core/core.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
package core

const userAgent = "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)"
import (
"fmt"

"github.com/go-shiori/shiori/internal/model"
)

var userAgent = fmt.Sprintf("Shiori/%s (+https://github.com/go-shiori/shiori)", model.BuildVersion)
20 changes: 6 additions & 14 deletions internal/core/ebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"os"
fp "path/filepath"
"strconv"
"strings"

epub "github.com/go-shiori/go-epub"
"github.com/go-shiori/shiori/internal/dependencies"
Expand All @@ -15,35 +14,28 @@
// GenerateEbook receives a `ProcessRequest` and generates an ebook file in the destination path specified.
// The destination path `dstPath` should include file name with ".epub" extension
// The bookmark model will be used to update the UI based on whether this function is successful or not.
func GenerateEbook(deps *dependencies.Dependencies, req ProcessRequest, dstPath string) (book model.BookmarkDTO, err error) {
func GenerateEbook(deps *dependencies.Dependencies, req model.EbookProcessRequest) (book model.BookmarkDTO, err error) {
book = req.Bookmark
dstPath := model.GetEbookPath(&book)

// Make sure bookmark ID is defined
if book.ID == 0 {
return book, errors.New("bookmark ID is not valid")
}

if deps.Domains.Storage.FileExists(dstPath) && req.SkipExisting {
return book, nil
}

Check warning on line 28 in internal/core/ebook.go

View check run for this annotation

Codecov / codecov/patch

internal/core/ebook.go#L27-L28

Added lines #L27 - L28 were not covered by tests

// Get current state of bookmark cheak archive and thumb
strID := strconv.Itoa(book.ID)

bookmarkThumbnailPath := model.GetThumbnailPath(&book)
bookmarkArchivePath := model.GetArchivePath(&book)

if deps.Domains.Storage.FileExists(bookmarkThumbnailPath) {
book.ImageURL = fp.Join("/", "bookmark", strID, "thumb")
}

if deps.Domains.Storage.FileExists(bookmarkArchivePath) {
book.HasArchive = true
}

// This function create ebook from reader mode of bookmark so
// we can't create ebook from PDF so we return error here if bookmark is a pdf
contentType := req.ContentType
if strings.Contains(contentType, "application/pdf") {
return book, errors.New("can't create ebook for pdf")
}

// Create temporary epub file
tmpFile, err := os.CreateTemp("", "ebook")
if err != nil {
Expand Down
Loading
Loading