From 089695764dfe4bedeeb642e484515457a251d6ce Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Tue, 25 Jun 2024 21:50:30 +0200 Subject: [PATCH 01/18] feat: new pdf archiver --- internal/archiver/pdf.go | 32 +++++++++++++++++++++++++++ internal/archiver/warc.go | 42 ++++++++++++++++++++++++++++++++++++ internal/domains/archiver.go | 29 ++++++++++++------------- internal/model/archiver.go | 8 +++++++ 4 files changed, 96 insertions(+), 15 deletions(-) create mode 100644 internal/archiver/pdf.go create mode 100644 internal/archiver/warc.go create mode 100644 internal/model/archiver.go diff --git a/internal/archiver/pdf.go b/internal/archiver/pdf.go new file mode 100644 index 000000000..e07394879 --- /dev/null +++ b/internal/archiver/pdf.go @@ -0,0 +1,32 @@ +package archiver + +import ( + "fmt" + "io" + "os" + + "github.com/go-shiori/shiori/internal/dependencies" + "github.com/go-shiori/shiori/internal/model" +) + +type PDFArchiver struct { + deps *dependencies.Dependencies +} + +func (a *PDFArchiver) Matches(contentType string) bool { + return contentType == "application/pdf" +} + +func (a *PDFArchiver) Archive(content io.ReadCloser, contentType string, bookmark model.BookmarkDTO) (*model.BookmarkDTO, error) { + if err := a.deps.Domains.Storage.WriteFile(model.GetArchivePath(&bookmark), content.(*os.File)); err != nil { + return nil, fmt.Errorf("error saving pdf archive: %v", err) + } + + return nil, nil +} + +func NewPDFArchiver(deps *dependencies.Dependencies) *PDFArchiver { + return &PDFArchiver{ + deps: deps, + } +} diff --git a/internal/archiver/warc.go b/internal/archiver/warc.go new file mode 100644 index 000000000..2d21a2f24 --- /dev/null +++ b/internal/archiver/warc.go @@ -0,0 +1,42 @@ +package archiver + +import ( + "fmt" + "io" + + "github.com/go-shiori/shiori/internal/core" + "github.com/go-shiori/shiori/internal/dependencies" + "github.com/go-shiori/shiori/internal/model" +) + +type WARCArchiver struct { + deps *dependencies.Dependencies +} + +func (a *WARCArchiver) Matches(contentType string) bool { + return true +} + +func (a *WARCArchiver) Archive(content io.ReadCloser, contentType string, bookmark model.BookmarkDTO) (*model.BookmarkDTO, error) { + processRequest := core.ProcessRequest{ + DataDir: a.deps.Config.Storage.DataDir, + Bookmark: bookmark, + Content: content, + ContentType: contentType, + } + + result, isFatalErr, err := core.ProcessBookmark(a.deps, processRequest) + content.Close() + + if err != nil && isFatalErr { + return nil, fmt.Errorf("failed to process: %v", err) + } + + return &result, nil +} + +func NewWARCArchiver(deps *dependencies.Dependencies) *WARCArchiver { + return &WARCArchiver{ + deps: deps, + } +} diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index 358523b54..f7747224d 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -4,6 +4,7 @@ import ( "fmt" "path/filepath" + "github.com/go-shiori/shiori/internal/archiver" "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/model" @@ -11,7 +12,8 @@ import ( ) type ArchiverDomain struct { - deps *dependencies.Dependencies + deps *dependencies.Dependencies + archivers []model.Archiver } func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model.BookmarkDTO, error) { @@ -20,21 +22,13 @@ func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model return nil, fmt.Errorf("error downloading url: %s", err) } - processRequest := core.ProcessRequest{ - DataDir: d.deps.Config.Storage.DataDir, - Bookmark: book, - Content: content, - ContentType: contentType, + for _, archiver := range d.archivers { + if archiver.Matches(contentType) { + return archiver.Archive(content, contentType, book) + } } - result, isFatalErr, err := core.ProcessBookmark(d.deps, processRequest) - content.Close() - - if err != nil && isFatalErr { - return nil, fmt.Errorf("failed to process: %v", err) - } - - return &result, nil + return nil, fmt.Errorf("no archiver found for content type: %s", contentType) } func (d *ArchiverDomain) GetBookmarkArchive(book *model.BookmarkDTO) (*warc.Archive, error) { @@ -49,7 +43,12 @@ func (d *ArchiverDomain) GetBookmarkArchive(book *model.BookmarkDTO) (*warc.Arch } func NewArchiverDomain(deps *dependencies.Dependencies) *ArchiverDomain { + archivers := []model.Archiver{ + archiver.NewPDFArchiver(deps), + archiver.NewWARCArchiver(deps), + } return &ArchiverDomain{ - deps: deps, + deps: deps, + archivers: archivers, } } diff --git a/internal/model/archiver.go b/internal/model/archiver.go new file mode 100644 index 000000000..2aa0eca08 --- /dev/null +++ b/internal/model/archiver.go @@ -0,0 +1,8 @@ +package model + +import "io" + +type Archiver interface { + Archive(content io.ReadCloser, contentType string, bookmark BookmarkDTO) (*BookmarkDTO, error) + Matches(contentType string) bool +} From 1fe353e16289beb1cfe6be67cf9193149bb30495 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Tue, 25 Jun 2024 23:18:48 +0200 Subject: [PATCH 02/18] archiver, archiver type --- internal/archiver/pdf.go | 12 ++-- internal/cmd/add.go | 38 +++-------- internal/core/processing.go | 2 + .../sqlite/0004_bookmark_archiver.up.sql | 24 +++++++ internal/database/sqlite.go | 52 ++++++++++++-- internal/domains/archiver.go | 5 ++ internal/domains/storage.go | 23 +++++++ internal/http/routes/api/v1/bookmarks.go | 42 +++--------- internal/model/archiver.go | 5 ++ internal/model/bookmark.go | 2 + internal/model/domains.go | 3 + internal/webserver/handler-api-ext.go | 67 +++++++++---------- internal/webserver/handler-api.go | 28 +++----- 13 files changed, 179 insertions(+), 124 deletions(-) create mode 100644 internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql diff --git a/internal/archiver/pdf.go b/internal/archiver/pdf.go index e07394879..592b74aaa 100644 --- a/internal/archiver/pdf.go +++ b/internal/archiver/pdf.go @@ -3,7 +3,7 @@ package archiver import ( "fmt" "io" - "os" + "strings" "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/model" @@ -14,15 +14,19 @@ type PDFArchiver struct { } func (a *PDFArchiver) Matches(contentType string) bool { - return contentType == "application/pdf" + return strings.Contains(contentType, "application/pdf") } func (a *PDFArchiver) Archive(content io.ReadCloser, contentType string, bookmark model.BookmarkDTO) (*model.BookmarkDTO, error) { - if err := a.deps.Domains.Storage.WriteFile(model.GetArchivePath(&bookmark), content.(*os.File)); err != nil { + if err := a.deps.Domains.Storage.WriteReader(model.GetArchivePath(&bookmark), content); err != nil { return nil, fmt.Errorf("error saving pdf archive: %v", err) } - return nil, nil + bookmark.ArchivePath = model.GetArchivePath(&bookmark) + bookmark.HasArchive = true + bookmark.Archiver = model.ArchiverPDF + + return &bookmark, nil } func NewPDFArchiver(deps *dependencies.Dependencies) *PDFArchiver { diff --git a/internal/cmd/add.go b/internal/cmd/add.go index f099bcdcf..6c4f5b401 100644 --- a/internal/cmd/add.go +++ b/internal/cmd/add.go @@ -29,7 +29,7 @@ func addCmd() *cobra.Command { } func addHandler(cmd *cobra.Command, args []string) { - cfg, deps := initShiori(cmd.Context(), cmd) + _, deps := initShiori(cmd.Context(), cmd) // Read flag and arguments url := args[0] @@ -38,7 +38,6 @@ func addHandler(cmd *cobra.Command, args []string) { tags, _ := cmd.Flags().GetStringSlice("tags") offline, _ := cmd.Flags().GetBool("offline") noArchival, _ := cmd.Flags().GetBool("no-archival") - logArchival, _ := cmd.Flags().GetBool("log-archival") // Normalize input title = validateTitle(title, "") @@ -84,37 +83,22 @@ func addHandler(cmd *cobra.Command, args []string) { if !offline { cInfo.Println("Downloading article...") - var isFatalErr bool - content, contentType, err := core.DownloadBookmark(book.URL) + result, err := deps.Domains.Archiver.DownloadBookmarkArchive(book) if err != nil { - cError.Printf("Failed to download: %v\n", err) + cError.Printf("Failed to download article: %v\n", err) + os.Exit(1) + } + + if title != "" { + result.Title = title } - if err == nil && content != nil { - request := core.ProcessRequest{ - DataDir: cfg.Storage.DataDir, - Bookmark: book, - Content: content, - ContentType: contentType, - LogArchival: logArchival, - KeepTitle: title != "", - KeepExcerpt: excerpt != "", - } - - book, isFatalErr, err = core.ProcessBookmark(deps, request) - content.Close() - - if err != nil { - cError.Printf("Failed: %v\n", err) - } - - if isFatalErr { - os.Exit(1) - } + if excerpt != "" { + result.Excerpt = excerpt } // Save bookmark to database - _, err = deps.Database.SaveBookmarks(cmd.Context(), false, book) + _, err = deps.Database.SaveBookmarks(cmd.Context(), false, *result) if err != nil { cError.Printf("Failed to save bookmark with content: %v\n", err) os.Exit(1) diff --git a/internal/core/processing.go b/internal/core/processing.go index 32a4837ae..fdb156303 100644 --- a/internal/core/processing.go +++ b/internal/core/processing.go @@ -186,6 +186,8 @@ func ProcessBookmark(deps *dependencies.Dependencies, req ProcessRequest) (book } book.HasArchive = true + book.ArchivePath = dstPath + book.Archiver = model.ArchiverWARC } return book, false, nil diff --git a/internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql b/internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql new file mode 100644 index 000000000..bf7297733 --- /dev/null +++ b/internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql @@ -0,0 +1,24 @@ +-- Create a temporary table +CREATE TABLE IF NOT EXISTS bookmark_temp( + id INTEGER PRIMARY KEY AUTOINCREMENT, + url TEXT NOT NULL, + title TEXT NOT NULL, + excerpt TEXT NOT NULL DEFAULT "", + author TEXT NOT NULL DEFAULT "", + public INTEGER NOT NULL DEFAULT 0, + modified TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + has_content BOOLEAN DEFAULT FALSE NOT NULL, + archiver TEXT NOT NULL DEFAULT "", + archive_path TEXT NOT NULL DEFAULT "", + CONSTRAINT bookmark_url_UNIQUE UNIQUE(url) +); + +-- Copy data from the original table to the temporary table +INSERT INTO bookmark_temp (id, url, title, excerpt, author, public, modified, has_content, archiver, archive_path) +SELECT id, url, title, excerpt, author, public, modified, has_content, "warc", "" FROM bookmark; + +-- Drop the original table +DROP TABLE bookmark; + +-- Rename the temporary table to the original table name +ALTER TABLE bookmark_temp RENAME TO bookmark; diff --git a/internal/database/sqlite.go b/internal/database/sqlite.go index 7cbc279b6..e328c4fd8 100644 --- a/internal/database/sqlite.go +++ b/internal/database/sqlite.go @@ -60,6 +60,43 @@ var sqliteMigrations = []migration{ }), newFileMigration("0.3.0", "0.4.0", "sqlite/0002_denormalize_content"), newFileMigration("0.4.0", "0.5.0", "sqlite/0003_uniq_id"), + newFileMigration("0.5.0", "0.6.0", "sqlite/0004_bookmark_archiver"), + // newFuncMigration("0.6.0", "0.7.0", func(db *sql.DB) error { + // // Ensure that the field `archive_path` has the path to the archive if the file exists + + // tx, err := db.Begin() + // if err != nil { + // return fmt.Errorf("failed to start transaction: %w", err) + // } + + // // Get all bookmarks + // rows, err := tx.Query(`SELECT id, url FROM bookmark`) + // if err != nil { + // return fmt.Errorf("failed to get bookmarks: %w", err) + // } + + // // Prepare statement + // stmt, err := tx.Prepare(`UPDATE bookmark SET archive_path = ? WHERE id = ?`) + // if err != nil { + // return fmt.Errorf("failed to prepare statement: %w", err) + // } + + // // Close rows and statement + // defer rows.Close() + // defer stmt.Close() + + // // Iterate over bookmarks + // for rows.Next() { + // var id int + // var url string + + // if err := rows.Scan(&id, &url); err != nil { + // return fmt.Errorf("failed to scan row: %w", err) + // } + // } + + // return nil + // }), } // SQLiteDatabase is implementation of Database interface @@ -127,15 +164,16 @@ func (db *SQLiteDatabase) SaveBookmarks(ctx context.Context, create bool, bookma // Prepare statement stmtInsertBook, err := tx.PreparexContext(ctx, `INSERT INTO bookmark - (url, title, excerpt, author, public, modified, has_content) - VALUES(?, ?, ?, ?, ?, ?, ?) RETURNING id`) + (url, title, excerpt, author, public, modified, has_content, archiver, archive_path) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING id`) if err != nil { return errors.WithStack(err) } stmtUpdateBook, err := tx.PreparexContext(ctx, `UPDATE bookmark SET url = ?, title = ?, excerpt = ?, author = ?, - public = ?, modified = ?, has_content = ? + public = ?, modified = ?, has_content = ?, + archiver = ?, archive_path = ? WHERE id = ?`) if err != nil { return errors.WithStack(err) @@ -203,10 +241,10 @@ func (db *SQLiteDatabase) SaveBookmarks(ctx context.Context, create bool, bookma var err error if create { err = stmtInsertBook.QueryRowContext(ctx, - book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.Modified, hasContent).Scan(&book.ID) + book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.Modified, hasContent, book.Archiver, book.ArchivePath).Scan(&book.ID) } else { _, err = stmtUpdateBook.ExecContext(ctx, - book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.Modified, hasContent, book.ID) + book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.Modified, hasContent, book.Archiver, book.ArchivePath, book.ID) } if err != nil { return errors.WithStack(err) @@ -299,7 +337,9 @@ func (db *SQLiteDatabase) GetBookmarks(ctx context.Context, opts GetBookmarksOpt b.author, b.public, b.modified, - b.has_content + b.has_content, + b.archiver, + b.archive_path FROM bookmark b WHERE 1` diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index f7747224d..b47c6b78e 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -2,6 +2,7 @@ package domains import ( "fmt" + "io" "path/filepath" "github.com/go-shiori/shiori/internal/archiver" @@ -22,6 +23,10 @@ func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model return nil, fmt.Errorf("error downloading url: %s", err) } + return d.ProcessBookmarkArchive(content, contentType, book) +} + +func (d *ArchiverDomain) ProcessBookmarkArchive(content io.ReadCloser, contentType string, book model.BookmarkDTO) (*model.BookmarkDTO, error) { for _, archiver := range d.archivers { if archiver.Matches(contentType) { return archiver.Archive(content, contentType, book) diff --git a/internal/domains/storage.go b/internal/domains/storage.go index 9cb49dd40..c4c0c71d0 100644 --- a/internal/domains/storage.go +++ b/internal/domains/storage.go @@ -97,3 +97,26 @@ func (d *StorageDomain) WriteFile(dst string, tmpFile *os.File) error { return nil } + +// WriteReader writes a reader to storage. +func (d *StorageDomain) WriteReader(dst string, reader io.Reader) error { + if dst != "" && !d.DirExists(dst) { + err := d.fs.MkdirAll(filepath.Dir(dst), model.DataDirPerm) + if err != nil { + return fmt.Errorf("failed to create destination dir: %v", err) + } + } + + dstFile, err := d.fs.Create(dst) + if err != nil { + return fmt.Errorf("failed to create destination file: %v", err) + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, reader) + if err != nil { + return fmt.Errorf("failed to copy file to the destination") + } + + return nil +} diff --git a/internal/http/routes/api/v1/bookmarks.go b/internal/http/routes/api/v1/bookmarks.go index a95ec538b..6b12fa486 100644 --- a/internal/http/routes/api/v1/bookmarks.go +++ b/internal/http/routes/api/v1/bookmarks.go @@ -3,13 +3,10 @@ package api_v1 import ( "fmt" "net/http" - "os" - fp "path/filepath" "strconv" "sync" "github.com/gin-gonic/gin" - "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/database" "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/http/context" @@ -186,35 +183,7 @@ func (r *BookmarksAPIRoutes) updateCache(c *gin.Context) { <-semaphore }() - // Download data from internet - content, contentType, err := core.DownloadBookmark(book.URL) - if err != nil { - chProblem <- book.ID - return - } - - request := core.ProcessRequest{ - DataDir: r.deps.Config.Storage.DataDir, - Bookmark: book, - Content: content, - ContentType: contentType, - KeepTitle: keep_metadata, - KeepExcerpt: keep_metadata, - } - - if payload.SkipExist && book.CreateEbook { - strID := strconv.Itoa(book.ID) - ebookPath := fp.Join(request.DataDir, "ebook", strID+".epub") - _, err = os.Stat(ebookPath) - if err == nil { - request.Bookmark.CreateEbook = false - request.Bookmark.HasEbook = true - } - } - - book, _, err = core.ProcessBookmark(r.deps, request) - content.Close() - + result, err := r.deps.Domains.Archiver.DownloadBookmarkArchive(book) if err != nil { r.logger.WithFields(logrus.Fields{ "bookmark_id": book.ID, @@ -225,6 +194,15 @@ func (r *BookmarksAPIRoutes) updateCache(c *gin.Context) { return } + // If user want to keep metadata, restore it + if keep_metadata { + result.Title = book.Title + result.Excerpt = book.Excerpt + } + + // Create ebook if needed + // TODO + // Update list of bookmarks mx.Lock() bookmarks[i] = book diff --git a/internal/model/archiver.go b/internal/model/archiver.go index 2aa0eca08..2e33abfa4 100644 --- a/internal/model/archiver.go +++ b/internal/model/archiver.go @@ -2,6 +2,11 @@ package model import "io" +const ( + ArchiverPDF = "pdf" + ArchiverWARC = "warc" +) + type Archiver interface { Archive(content io.ReadCloser, contentType string, bookmark BookmarkDTO) (*BookmarkDTO, error) Matches(contentType string) bool diff --git a/internal/model/bookmark.go b/internal/model/bookmark.go index a85ec2a01..61911501d 100644 --- a/internal/model/bookmark.go +++ b/internal/model/bookmark.go @@ -20,6 +20,8 @@ type BookmarkDTO struct { ImageURL string `db:"image_url" json:"imageURL"` HasContent bool `db:"has_content" json:"hasContent"` Tags []Tag `json:"tags"` + Archiver string `db:"archiver" json:"archiver"` + ArchivePath string `db:"archive_path" json:"archivePath"` HasArchive bool `json:"hasArchive"` HasEbook bool `json:"hasEbook"` CreateArchive bool `json:"create_archive"` // TODO: migrate outside the DTO diff --git a/internal/model/domains.go b/internal/model/domains.go index ce4e0532b..9ab25a1df 100644 --- a/internal/model/domains.go +++ b/internal/model/domains.go @@ -2,6 +2,7 @@ package model import ( "context" + "io" "io/fs" "os" "time" @@ -25,6 +26,7 @@ type AccountsDomain interface { type ArchiverDomain interface { DownloadBookmarkArchive(book BookmarkDTO) (*BookmarkDTO, error) + ProcessBookmarkArchive(content io.ReadCloser, contentType string, book BookmarkDTO) (*BookmarkDTO, error) GetBookmarkArchive(book *BookmarkDTO) (*warc.Archive, error) } @@ -35,4 +37,5 @@ type StorageDomain interface { DirExists(path string) bool WriteData(dst string, data []byte) error WriteFile(dst string, src *os.File) error + WriteReader(dst string, src io.Reader) error } diff --git a/internal/webserver/handler-api-ext.go b/internal/webserver/handler-api-ext.go index cf9270490..fb809c4b3 100644 --- a/internal/webserver/handler-api-ext.go +++ b/internal/webserver/handler-api-ext.go @@ -1,7 +1,6 @@ package webserver import ( - "bytes" "encoding/json" "fmt" "io" @@ -10,6 +9,7 @@ import ( "os" fp "path/filepath" "strconv" + "strings" "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/model" @@ -59,19 +59,6 @@ func (h *Handler) ApiInsertViaExtension(w http.ResponseWriter, r *http.Request, request.Title = request.URL } - // Since we are using extension, the extension might send the HTML content - // so no need to download it again here. However, if it's empty, it might be not HTML file - // so we download it here. - var contentType string - var contentBuffer io.Reader - - if request.HTML == "" { - contentBuffer, contentType, _ = core.DownloadBookmark(request.URL) - } else { - contentType = "text/html; charset=UTF-8" - contentBuffer = bytes.NewBufferString(request.HTML) - } - // Save the bookmark with whatever we already have downloaded // since we need the ID in order to download the archive // Only when old bookmark is not exists. @@ -82,38 +69,44 @@ func (h *Handler) ApiInsertViaExtension(w http.ResponseWriter, r *http.Request, return } book = books[0] + } else { + books, err := h.DB.SaveBookmarks(ctx, false, book) + if err != nil { + log.Printf("error saving bookmark before downloading content: %s", err) + return + } + book = books[0] } // At this point the web page already downloaded. // Time to process it. - if contentBuffer != nil { - book.CreateArchive = true - request := core.ProcessRequest{ - DataDir: h.DataDir, - Bookmark: book, - Content: contentBuffer, - ContentType: contentType, - } - - var isFatalErr bool - book, isFatalErr, err = core.ProcessBookmark(h.dependencies, request) - - if tmp, ok := contentBuffer.(io.ReadCloser); ok { - tmp.Close() - } + var result *model.BookmarkDTO + var errArchiver error + if request.HTML != "" { + contentType := "text/html; charset=UTF-8" + result, errArchiver = h.dependencies.Domains.Archiver.ProcessBookmarkArchive(io.NopCloser(strings.NewReader(request.HTML)), contentType, book) + } else { + result, errArchiver = h.dependencies.Domains.Archiver.DownloadBookmarkArchive(book) + } + if errArchiver != nil { + log.Printf("error downloading bookmark cache: %s", errArchiver) + w.WriteHeader(http.StatusInternalServerError) + return + } - // If we can't process or update the saved bookmark, just log it and continue on with the - // request. - if err != nil && isFatalErr { - log.Printf("failed to process bookmark: %v", err) - } else if _, err := h.DB.SaveBookmarks(ctx, false, book); err != nil { - log.Printf("error saving bookmark after downloading content: %s", err) - } + // Save the bookmark with whatever we already have downloaded + // since we need the ID in order to download the archive + books, err := h.DB.SaveBookmarks(ctx, request.ID == 0, *result) + if err != nil { + log.Printf("error saving bookmark from extension downloading content: %s", err) + w.WriteHeader(http.StatusInternalServerError) + return } + book = books[0] // Return the new bookmark w.Header().Set("Content-Type", "application/json") - err = json.NewEncoder(w).Encode(&book) + err = json.NewEncoder(w).Encode(&result) checkError(err) } diff --git a/internal/webserver/handler-api.go b/internal/webserver/handler-api.go index 3772459bd..75a63d7b4 100644 --- a/internal/webserver/handler-api.go +++ b/internal/webserver/handler-api.go @@ -21,29 +21,21 @@ import ( "golang.org/x/crypto/bcrypt" ) -func downloadBookmarkContent(deps *dependencies.Dependencies, book *model.BookmarkDTO, dataDir string, request *http.Request, keepTitle, keepExcerpt bool) (*model.BookmarkDTO, error) { - content, contentType, err := core.DownloadBookmark(book.URL) +func downloadBookmarkContent(deps *dependencies.Dependencies, book *model.BookmarkDTO, keepTitle, keepExcerpt bool) (*model.BookmarkDTO, error) { + result, err := deps.Domains.Archiver.DownloadBookmarkArchive(*book) if err != nil { - return nil, fmt.Errorf("error downloading url: %s", err) + return nil, fmt.Errorf("error archiving url: %s", err) } - processRequest := core.ProcessRequest{ - DataDir: dataDir, - Bookmark: *book, - Content: content, - ContentType: contentType, - KeepTitle: keepTitle, - KeepExcerpt: keepExcerpt, + if keepTitle { + result.Title = book.Title } - result, isFatalErr, err := core.ProcessBookmark(deps, processRequest) - content.Close() - - if err != nil && isFatalErr { - return nil, fmt.Errorf("failed to process: %v", err) + if keepExcerpt { + result.Excerpt = book.Excerpt } - return &result, err + return result, err } // ApiLogout is handler for POST /api/logout @@ -240,7 +232,7 @@ func (h *Handler) ApiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h if payload.Async { go func() { - bookmark, err := downloadBookmarkContent(h.dependencies, book, h.DataDir, r, userHasDefinedTitle, book.Excerpt != "") + bookmark, err := downloadBookmarkContent(h.dependencies, book, userHasDefinedTitle, book.Excerpt != "") if err != nil { log.Printf("error downloading boorkmark: %s", err) return @@ -252,7 +244,7 @@ func (h *Handler) ApiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h } else { // Workaround. Download content after saving the bookmark so we have the proper database // id already set in the object regardless of the database engine. - book, err = downloadBookmarkContent(h.dependencies, book, h.DataDir, r, userHasDefinedTitle, book.Excerpt != "") + book, err = downloadBookmarkContent(h.dependencies, book, userHasDefinedTitle, book.Excerpt != "") if err != nil { log.Printf("error downloading boorkmark: %s", err) } else if _, err := h.DB.SaveBookmarks(ctx, false, *book); err != nil { From 8f7837d57f7c7a2c19c05e26ef562ded1867d0d9 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Tue, 25 Jun 2024 23:36:56 +0200 Subject: [PATCH 03/18] mysql pg migrations --- .../postgres/0002_bookmark_archiver.up.sql | 2 + internal/database/mysql.go | 43 ++++++++++++++++--- internal/database/pg.go | 22 +++++++--- 3 files changed, 54 insertions(+), 13 deletions(-) create mode 100644 internal/database/migrations/postgres/0002_bookmark_archiver.up.sql diff --git a/internal/database/migrations/postgres/0002_bookmark_archiver.up.sql b/internal/database/migrations/postgres/0002_bookmark_archiver.up.sql new file mode 100644 index 000000000..b0ac92825 --- /dev/null +++ b/internal/database/migrations/postgres/0002_bookmark_archiver.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE bookmark ADD COLUMN archiver TEXT NOT NULL DEFAULT ''; +ALTER TABLE bookmark ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; diff --git a/internal/database/mysql.go b/internal/database/mysql.go index ee4e3f82a..4414b3b9d 100644 --- a/internal/database/mysql.go +++ b/internal/database/mysql.go @@ -59,6 +59,31 @@ var mysqlMigrations = []migration{ } } + return nil + }), + // Adds archiver and archive_path columns to bookmark table + newFuncMigration("0.7.0", "0.8.0", func(db *sql.DB) error { + tx, err := db.Begin() + if err != nil { + return fmt.Errorf("failed to start transaction: %w", err) + } + + defer tx.Rollback() + + _, err = tx.Exec(`ALTER TABLE bookmark ADD COLUMN archiver TEXT NOT NULL DEFAULT ''`) + if err != nil { + return fmt.Errorf("failed to add archiver column to bookmark_tag table: %w", err) + } + + _, err = tx.Exec(`ALTER TABLE bookmark ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''`) + if err != nil { + return fmt.Errorf("failed to add archiver column to bookmark_tag table: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("failed to commit transaction: %w", err) + } + return nil }), } @@ -131,8 +156,8 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar if err := db.withTx(ctx, func(tx *sqlx.Tx) error { // Prepare statement stmtInsertBook, err := tx.Preparex(`INSERT INTO bookmark - (url, title, excerpt, author, public, content, html, modified) - VALUES(?, ?, ?, ?, ?, ?, ?, ?)`) + (url, title, excerpt, author, public, content, html, modified, archiver, archive_path) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) if err != nil { return errors.WithStack(err) } @@ -145,7 +170,9 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar public = ?, content = ?, html = ?, - modified = ? + modified = ?, + archiver = ?, + archive_path = ? WHERE id = ?`) if err != nil { return errors.WithStack(err) @@ -199,7 +226,8 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar var res sql.Result res, err = stmtInsertBook.ExecContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.Modified) + book.Public, book.Content, book.HTML, book.Modified, + book.Archiver, book.ArchivePath) if err != nil { return errors.WithStack(err) } @@ -211,7 +239,8 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar } else { _, err = stmtUpdateBook.ExecContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.Modified, book.ID) + book.Public, book.Content, book.HTML, book.Modified, + book.Archiver, book.ArchivePath, book.ID) } if err != nil { return errors.WithStack(err) @@ -286,7 +315,9 @@ func (db *MySQLDatabase) GetBookmarks(ctx context.Context, opts GetBookmarksOpti `author`, `public`, `modified`, - `content <> "" has_content`} + `content <> "" has_content`, + `archiver`, + `archive_path`} if opts.WithContent { columns = append(columns, `content`, `html`) diff --git a/internal/database/pg.go b/internal/database/pg.go index 132cd361d..0a7561bf0 100644 --- a/internal/database/pg.go +++ b/internal/database/pg.go @@ -57,6 +57,7 @@ var postgresMigrations = []migration{ return nil }), + newFileMigration("0.3.0", "0.4.0", "postgres/0002_bookmark_archiver"), } // PGDatabase is implementation of Database interface @@ -128,8 +129,8 @@ func (db *PGDatabase) SaveBookmarks(ctx context.Context, create bool, bookmarks if err := db.withTx(ctx, func(tx *sqlx.Tx) error { // Prepare statement stmtInsertBook, err := tx.Preparex(`INSERT INTO bookmark - (url, title, excerpt, author, public, content, html, modified) - VALUES($1, $2, $3, $4, $5, $6, $7, $8) + (url, title, excerpt, author, public, content, html, modified, archiver, archive_path) + VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING id`) if err != nil { return errors.WithStack(err) @@ -143,8 +144,10 @@ func (db *PGDatabase) SaveBookmarks(ctx context.Context, create bool, bookmarks public = $5, content = $6, html = $7, - modified = $8 - WHERE id = $9`) + modified = $8, + archiver = $9, + archive_path = $10 + WHERE id = $11`) if err != nil { return errors.WithStack(err) } @@ -196,11 +199,14 @@ func (db *PGDatabase) SaveBookmarks(ctx context.Context, create bool, bookmarks if create { err = stmtInsertBook.QueryRowContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.Modified).Scan(&book.ID) + book.Public, book.Content, book.HTML, book.Modified, + book.Archiver, book.ArchivePath).Scan(&book.ID) } else { _, err = stmtUpdateBook.ExecContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.Modified, book.ID) + book.Public, book.Content, book.HTML, book.Modified, + book.Archiver, book.ArchivePath, + book.ID) } if err != nil { return errors.WithStack(err) @@ -271,7 +277,9 @@ func (db *PGDatabase) GetBookmarks(ctx context.Context, opts GetBookmarksOptions `author`, `public`, `modified`, - `content <> '' has_content`} + `content <> '' has_content, + archiver, + archive_path`} if opts.WithContent { columns = append(columns, `content`, `html`) From 3f27d7c4cb072bea3ef96991cba19b7b88f5dad2 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Tue, 25 Jun 2024 23:42:36 +0200 Subject: [PATCH 04/18] iterate archivers if error --- internal/domains/archiver.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index b47c6b78e..ea383ff61 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -29,7 +29,12 @@ func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model func (d *ArchiverDomain) ProcessBookmarkArchive(content io.ReadCloser, contentType string, book model.BookmarkDTO) (*model.BookmarkDTO, error) { for _, archiver := range d.archivers { if archiver.Matches(contentType) { - return archiver.Archive(content, contentType, book) + _, err := archiver.Archive(content, contentType, book) + if err != nil { + d.deps.Log.Errorf("Error archiving bookmark with archviver: %s", err) + continue + } + return &book, nil } } From da412113d9e7e19a1661a512da29962db55f33bb Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 00:00:51 +0200 Subject: [PATCH 05/18] fixed getbookmark --- internal/database/database_test.go | 9 +++++++-- internal/database/mysql.go | 3 ++- internal/database/pg.go | 3 ++- internal/database/sqlite.go | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/internal/database/database_test.go b/internal/database/database_test.go index cb00b5402..0949814de 100644 --- a/internal/database/database_test.go +++ b/internal/database/database_test.go @@ -223,8 +223,10 @@ func testGetBookmark(t *testing.T, db DB) { ctx := context.TODO() book := model.BookmarkDTO{ - URL: "https://github.com/go-shiori/shiori", - Title: "shiori", + URL: "https://github.com/go-shiori/shiori", + Title: "shiori", + Archiver: model.ArchiverPDF, + ArchivePath: "test", } result, err := db.SaveBookmarks(ctx, true, book) @@ -235,6 +237,9 @@ func testGetBookmark(t *testing.T, db DB) { assert.NoError(t, err, "Get bookmark should not fail") assert.Equal(t, result[0].ID, savedBookmark.ID, "Retrieved bookmark should be the same") assert.Equal(t, book.URL, savedBookmark.URL, "Retrieved bookmark should be the same") + assert.Equal(t, book.Title, savedBookmark.Title, "Retrieved bookmark should be the same") + assert.Equal(t, book.Archiver, savedBookmark.Archiver, "Retrieved bookmark should be the same") + assert.Equal(t, book.ArchivePath, savedBookmark.ArchivePath, "Retrieved bookmark should be the same") } func testGetBookmarkNotExistent(t *testing.T, db DB) { diff --git a/internal/database/mysql.go b/internal/database/mysql.go index 4414b3b9d..fdf357e72 100644 --- a/internal/database/mysql.go +++ b/internal/database/mysql.go @@ -595,7 +595,8 @@ func (db *MySQLDatabase) GetBookmark(ctx context.Context, id int, url string) (m args := []interface{}{id} query := `SELECT id, url, title, excerpt, author, public, - content, html, modified, content <> '' has_content + content, html, modified, content <> '' has_content, + archiver, archive_path FROM bookmark WHERE id = ?` if url != "" { diff --git a/internal/database/pg.go b/internal/database/pg.go index 0a7561bf0..1a3c015d9 100644 --- a/internal/database/pg.go +++ b/internal/database/pg.go @@ -578,7 +578,8 @@ func (db *PGDatabase) GetBookmark(ctx context.Context, id int, url string) (mode args := []interface{}{id} query := `SELECT id, url, title, excerpt, author, public, - content, html, modified, content <> '' has_content + content, html, modified, content <> '' has_content, + archiver, archive_path FROM bookmark WHERE id = $1` if url != "" { diff --git a/internal/database/sqlite.go b/internal/database/sqlite.go index e328c4fd8..874db2f16 100644 --- a/internal/database/sqlite.go +++ b/internal/database/sqlite.go @@ -710,7 +710,7 @@ func (db *SQLiteDatabase) GetBookmark(ctx context.Context, id int, url string) ( args := []interface{}{id} query := `SELECT b.id, b.url, b.title, b.excerpt, b.author, b.public, b.modified, - bc.content, bc.html, b.has_content + bc.content, bc.html, b.has_content, b.archiver, b.archive_path FROM bookmark b LEFT JOIN bookmark_content bc ON bc.docid = b.id WHERE b.id = ?` From 9d34badeb2db5350905093a48631fae603cc5592 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 00:05:47 +0200 Subject: [PATCH 06/18] basic archive page --- internal/http/routes/bookmark.go | 71 +++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/internal/http/routes/bookmark.go b/internal/http/routes/bookmark.go index ae2cc0905..bf0394ee3 100644 --- a/internal/http/routes/bookmark.go +++ b/internal/http/routes/bookmark.go @@ -3,6 +3,7 @@ package routes import ( "fmt" "html/template" + "io" "net/http" "strconv" "strings" @@ -123,32 +124,62 @@ func (r *BookmarkRoutes) bookmarkArchiveFileHandler(c *gin.Context) { resourcePath, _ := c.Params.Get("filepath") resourcePath = strings.TrimPrefix(resourcePath, "/") - archive, err := r.deps.Domains.Archiver.GetBookmarkArchive(bookmark) - if err != nil { - r.logger.WithError(err).Error("error opening archive") - response.SendInternalServerError(c) - return - } - defer archive.Close() - - if !archive.HasResource(resourcePath) { - response.NotFound(c) - return - } - - content, resourceContentType, err := archive.Read(resourcePath) - if err != nil { - r.logger.WithError(err).Error("error reading archive file") - response.SendInternalServerError(c) - return + var content []byte + var resourceContentType string + + r.logger.Error(bookmark.Archiver) + + switch bookmark.Archiver { + case model.ArchiverPDF: + if !r.deps.Domains.Storage.FileExists(bookmark.ArchivePath) { + response.NotFound(c) + return + } + + archive, err := r.deps.Domains.Storage.FS().Open(bookmark.ArchivePath) + if err != nil { + r.logger.WithError(err).Error("error opening pdf archive") + response.SendInternalServerError(c) + return + } + defer archive.Close() + + content, err = io.ReadAll(archive) + if err != nil { + r.logger.WithError(err).Error("error reading archive file") + response.SendInternalServerError(c) + return + } + + resourceContentType = "application/pdf" + case model.ArchiverWARC: + archive, err := r.deps.Domains.Archiver.GetBookmarkArchive(bookmark) + if err != nil { + r.logger.WithError(err).Error("error opening warc archive") + response.SendInternalServerError(c) + return + } + defer archive.Close() + + if !archive.HasResource(resourcePath) { + response.NotFound(c) + return + } + + content, resourceContentType, err = archive.Read(resourcePath) + if err != nil { + r.logger.WithError(err).Error("error reading archive file") + response.SendInternalServerError(c) + return + } + + c.Header("Content-Encoding", "gzip") } // Generate weak ETAG shioriUUID := uuid.NewV5(uuid.NamespaceURL, model.ShioriURLNamespace) c.Header("Etag", fmt.Sprintf("W/%s", uuid.NewV5(shioriUUID, fmt.Sprintf("%x-%x-%x", bookmark.ID, resourcePath, len(content))))) c.Header("Cache-Control", "max-age=31536000") - - c.Header("Content-Encoding", "gzip") c.Data(http.StatusOK, resourceContentType, content) } From c8dd55dac604580640b9b44d922703de74e2748c Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 19:03:50 +0200 Subject: [PATCH 07/18] archive page --- internal/archiver/pdf.go | 20 ++++++++++ internal/archiver/warc.go | 36 +++++++++++++++++ internal/domains/archiver.go | 33 ++++++++++------ internal/http/routes/bookmark.go | 68 +++++++------------------------- internal/model/archiver.go | 54 ++++++++++++++++++++++++- internal/model/domains.go | 4 +- 6 files changed, 147 insertions(+), 68 deletions(-) diff --git a/internal/archiver/pdf.go b/internal/archiver/pdf.go index 592b74aaa..b720199c6 100644 --- a/internal/archiver/pdf.go +++ b/internal/archiver/pdf.go @@ -29,6 +29,26 @@ func (a *PDFArchiver) Archive(content io.ReadCloser, contentType string, bookmar return &bookmark, nil } +func (a *PDFArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { + archivePath := model.GetArchivePath(&bookmark) + + if !a.deps.Domains.Storage.FileExists(archivePath) { + return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID) + } + + archiveFile, err := a.deps.Domains.Storage.FS().Open(archivePath) + if err != nil { + return nil, fmt.Errorf("error opening pdf archive: %w", err) + } + + info, err := archiveFile.Stat() + if err != nil { + return nil, fmt.Errorf("error getting pdf archive info: %w", err) + } + + return model.NewArchiveFile(archiveFile, "application/pdf", "", info.Size()), nil +} + func NewPDFArchiver(deps *dependencies.Dependencies) *PDFArchiver { return &PDFArchiver{ deps: deps, diff --git a/internal/archiver/warc.go b/internal/archiver/warc.go index 2d21a2f24..bdfaa3483 100644 --- a/internal/archiver/warc.go +++ b/internal/archiver/warc.go @@ -3,12 +3,19 @@ package archiver import ( "fmt" "io" + "path/filepath" + "strings" "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/model" + "github.com/go-shiori/warc" ) +// LEGACY WARNING +// This file contains legacy code that will be removed once we move on to Obelisk as +// general archiver. + type WARCArchiver struct { deps *dependencies.Dependencies } @@ -35,6 +42,35 @@ func (a *WARCArchiver) Archive(content io.ReadCloser, contentType string, bookma return &result, nil } +func (a *WARCArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { + archivePath := model.GetArchivePath(&bookmark) + + if !a.deps.Domains.Storage.FileExists(archivePath) { + return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID) + } + + warcFile, err := warc.Open(filepath.Join(a.deps.Config.Storage.DataDir, archivePath)) + if err != nil { + return nil, fmt.Errorf("error opening warc file: %w", err) + } + + defer warcFile.Close() + + if !warcFile.HasResource(resourcePath) { + return nil, fmt.Errorf("resource %s doesn't exist in archive", resourcePath) + } + + content, contentType, err := warcFile.Read(resourcePath) + if err != nil { + return nil, fmt.Errorf("error reading resource %s: %w", resourcePath, err) + } + + // Note: Using this method to send the reader instead of `bytes.NewReader` because that + // crashes the moment we try to retrieve it for some reason. Since this is a legacy archiver + // I don't want to spend more time on this. (@fmartingr) + return model.NewArchiveFile(strings.NewReader(string(content)), contentType, "gzip", int64(len(content))), nil +} + func NewWARCArchiver(deps *dependencies.Dependencies) *WARCArchiver { return &WARCArchiver{ deps: deps, diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index ea383ff61..965ad55df 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -3,18 +3,16 @@ package domains import ( "fmt" "io" - "path/filepath" "github.com/go-shiori/shiori/internal/archiver" "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/model" - "github.com/go-shiori/warc" ) type ArchiverDomain struct { deps *dependencies.Dependencies - archivers []model.Archiver + archivers map[string]model.Archiver } func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model.BookmarkDTO, error) { @@ -41,21 +39,32 @@ func (d *ArchiverDomain) ProcessBookmarkArchive(content io.ReadCloser, contentTy return nil, fmt.Errorf("no archiver found for content type: %s", contentType) } -func (d *ArchiverDomain) GetBookmarkArchive(book *model.BookmarkDTO) (*warc.Archive, error) { - archivePath := model.GetArchivePath(book) +func (d *ArchiverDomain) GetBookmarkArchiveFile(book *model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { + archiver, err := d.GetArchiver(book.Archiver) + if err != nil { + return nil, err + } - if !d.deps.Domains.Storage.FileExists(archivePath) { - return nil, fmt.Errorf("archive for bookmark %d doesn't exist", book.ID) + archiveFile, err := archiver.GetArchiveFile(*book, resourcePath) + if err != nil { + return nil, fmt.Errorf("error getting archive file: %w", err) } - // FIXME: This only works in local filesystem - return warc.Open(filepath.Join(d.deps.Config.Storage.DataDir, archivePath)) + return archiveFile, nil +} + +func (d *ArchiverDomain) GetArchiver(name string) (model.Archiver, error) { + archiver, ok := d.archivers[name] + if !ok { + return nil, fmt.Errorf("archiver %s not found", name) + } + return archiver, nil } func NewArchiverDomain(deps *dependencies.Dependencies) *ArchiverDomain { - archivers := []model.Archiver{ - archiver.NewPDFArchiver(deps), - archiver.NewWARCArchiver(deps), + archivers := map[string]model.Archiver{ + model.ArchiverPDF: archiver.NewPDFArchiver(deps), + model.ArchiverWARC: archiver.NewWARCArchiver(deps), } return &ArchiverDomain{ deps: deps, diff --git a/internal/http/routes/bookmark.go b/internal/http/routes/bookmark.go index bf0394ee3..b34abde53 100644 --- a/internal/http/routes/bookmark.go +++ b/internal/http/routes/bookmark.go @@ -3,7 +3,6 @@ package routes import ( "fmt" "html/template" - "io" "net/http" "strconv" "strings" @@ -124,63 +123,26 @@ func (r *BookmarkRoutes) bookmarkArchiveFileHandler(c *gin.Context) { resourcePath, _ := c.Params.Get("filepath") resourcePath = strings.TrimPrefix(resourcePath, "/") - var content []byte - var resourceContentType string - - r.logger.Error(bookmark.Archiver) - - switch bookmark.Archiver { - case model.ArchiverPDF: - if !r.deps.Domains.Storage.FileExists(bookmark.ArchivePath) { - response.NotFound(c) - return - } - - archive, err := r.deps.Domains.Storage.FS().Open(bookmark.ArchivePath) - if err != nil { - r.logger.WithError(err).Error("error opening pdf archive") - response.SendInternalServerError(c) - return - } - defer archive.Close() - - content, err = io.ReadAll(archive) - if err != nil { - r.logger.WithError(err).Error("error reading archive file") - response.SendInternalServerError(c) - return - } - - resourceContentType = "application/pdf" - case model.ArchiverWARC: - archive, err := r.deps.Domains.Archiver.GetBookmarkArchive(bookmark) - if err != nil { - r.logger.WithError(err).Error("error opening warc archive") - response.SendInternalServerError(c) - return - } - defer archive.Close() - - if !archive.HasResource(resourcePath) { - response.NotFound(c) - return - } - - content, resourceContentType, err = archive.Read(resourcePath) - if err != nil { - r.logger.WithError(err).Error("error reading archive file") - response.SendInternalServerError(c) - return - } - - c.Header("Content-Encoding", "gzip") + archiveFile, err := r.deps.Domains.Archiver.GetBookmarkArchiveFile(bookmark, resourcePath) + if err != nil { + r.logger.WithError(err).Error("error getting archive file") + response.SendInternalServerError(c) + return } + r.logger.Warn(archiveFile) + // Generate weak ETAG shioriUUID := uuid.NewV5(uuid.NamespaceURL, model.ShioriURLNamespace) - c.Header("Etag", fmt.Sprintf("W/%s", uuid.NewV5(shioriUUID, fmt.Sprintf("%x-%x-%x", bookmark.ID, resourcePath, len(content))))) + c.Header("Etag", fmt.Sprintf("W/%s", uuid.NewV5(shioriUUID, fmt.Sprintf("%x-%x-%x", bookmark.ID, resourcePath, archiveFile.Size())))) c.Header("Cache-Control", "max-age=31536000") - c.Data(http.StatusOK, resourceContentType, content) + + c.DataFromReader( + http.StatusOK, + archiveFile.Size(), + archiveFile.ContentType(), + archiveFile.Reader(), + archiveFile.AsHTTPHeaders()) } func (r *BookmarkRoutes) bookmarkThumbnailHandler(c *gin.Context) { diff --git a/internal/model/archiver.go b/internal/model/archiver.go index 2e33abfa4..73a1288b6 100644 --- a/internal/model/archiver.go +++ b/internal/model/archiver.go @@ -1,13 +1,65 @@ package model -import "io" +import ( + "io" + "strconv" +) const ( ArchiverPDF = "pdf" ArchiverWARC = "warc" ) +type ArchiveFile struct { + reader io.Reader + contentType string + size int64 // bytes + encoding string +} + +func (a *ArchiveFile) Reader() io.Reader { + return a.reader +} + +func (a *ArchiveFile) ContentType() string { + return a.contentType +} + +func (a *ArchiveFile) Size() int64 { + return a.size +} + +func (a *ArchiveFile) Encoding() string { + return a.encoding +} + +func (a *ArchiveFile) AsHTTPHeaders() map[string]string { + headers := map[string]string{ + "Content-Type": a.contentType, + } + + if a.size > 0 { + headers["Content-Length"] = strconv.FormatInt(a.size, 10) + } + + if a.encoding != "" { + headers["Content-Encoding"] = a.encoding + } + + return headers +} + +func NewArchiveFile(reader io.Reader, contentType, encoding string, size int64) *ArchiveFile { + return &ArchiveFile{ + reader: reader, + contentType: contentType, + encoding: encoding, + size: size, + } +} + type Archiver interface { Archive(content io.ReadCloser, contentType string, bookmark BookmarkDTO) (*BookmarkDTO, error) Matches(contentType string) bool + GetArchiveFile(bookmark BookmarkDTO, resourcePath string) (*ArchiveFile, error) } diff --git a/internal/model/domains.go b/internal/model/domains.go index 9ab25a1df..445c46522 100644 --- a/internal/model/domains.go +++ b/internal/model/domains.go @@ -7,7 +7,6 @@ import ( "os" "time" - "github.com/go-shiori/warc" "github.com/spf13/afero" ) @@ -27,10 +26,11 @@ type AccountsDomain interface { type ArchiverDomain interface { DownloadBookmarkArchive(book BookmarkDTO) (*BookmarkDTO, error) ProcessBookmarkArchive(content io.ReadCloser, contentType string, book BookmarkDTO) (*BookmarkDTO, error) - GetBookmarkArchive(book *BookmarkDTO) (*warc.Archive, error) + GetBookmarkArchiveFile(book *BookmarkDTO, archivePath string) (*ArchiveFile, error) } type StorageDomain interface { + // Open(name string) (os.File, error) Stat(name string) (fs.FileInfo, error) FS() afero.Fs FileExists(path string) bool From 978be82cf7e03244ae850d88361c38712ea378bf Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 19:58:10 +0200 Subject: [PATCH 08/18] bookmark logic --- internal/cmd/add.go | 2 +- internal/core/core.go | 8 +++- internal/core/ebook.go | 23 +++++++---- internal/core/ebook_test.go | 51 +++++++----------------- internal/core/processing.go | 7 ++-- internal/domains/archiver.go | 11 ++++- internal/http/routes/api/v1/bookmarks.go | 18 ++++++++- internal/http/routes/bookmark_test.go | 2 +- internal/model/archiver.go | 12 ++++++ internal/model/domains.go | 3 +- internal/webserver/handler-api-ext.go | 2 +- internal/webserver/handler-api.go | 2 +- 12 files changed, 84 insertions(+), 57 deletions(-) diff --git a/internal/cmd/add.go b/internal/cmd/add.go index 6c4f5b401..20b6b6305 100644 --- a/internal/cmd/add.go +++ b/internal/cmd/add.go @@ -83,7 +83,7 @@ func addHandler(cmd *cobra.Command, args []string) { if !offline { cInfo.Println("Downloading article...") - result, err := deps.Domains.Archiver.DownloadBookmarkArchive(book) + result, err := deps.Domains.Archiver.GenerateBookmarkArchive(book) if err != nil { cError.Printf("Failed to download article: %v\n", err) os.Exit(1) diff --git a/internal/core/core.go b/internal/core/core.go index a75f3ec49..5bf94999e 100644 --- a/internal/core/core.go +++ b/internal/core/core.go @@ -1,3 +1,9 @@ package core -const userAgent = "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)" +import ( + "fmt" + + "github.com/go-shiori/shiori/internal/model" +) + +var userAgent = fmt.Sprintf("Shiori/%s (+https://github.com/go-shiori/shiori)", model.BuildVersion) diff --git a/internal/core/ebook.go b/internal/core/ebook.go index b03e3974e..5ecbcb916 100644 --- a/internal/core/ebook.go +++ b/internal/core/ebook.go @@ -1,6 +1,7 @@ package core import ( + "fmt" "os" fp "path/filepath" "strconv" @@ -15,32 +16,38 @@ import ( // GenerateEbook receives a `ProcessRequest` and generates an ebook file in the destination path specified. // The destination path `dstPath` should include file name with ".epub" extension // The bookmark model will be used to update the UI based on whether this function is successful or not. -func GenerateEbook(deps *dependencies.Dependencies, req ProcessRequest, dstPath string) (book model.BookmarkDTO, err error) { +func GenerateEbook(deps *dependencies.Dependencies, req model.EbookProcessRequest) (book model.BookmarkDTO, err error) { book = req.Bookmark + dstPath := model.GetEbookPath(&book) // Make sure bookmark ID is defined if book.ID == 0 { return book, errors.New("bookmark ID is not valid") } - // Get current state of bookmark cheak archive and thumb - strID := strconv.Itoa(book.ID) + if deps.Domains.Storage.FileExists(dstPath) && req.SkipExisting { + return book, nil + } + strID := strconv.Itoa(book.ID) bookmarkThumbnailPath := model.GetThumbnailPath(&book) - bookmarkArchivePath := model.GetArchivePath(&book) if deps.Domains.Storage.FileExists(bookmarkThumbnailPath) { book.ImageURL = fp.Join("/", "bookmark", strID, "thumb") } - if deps.Domains.Storage.FileExists(bookmarkArchivePath) { - book.HasArchive = true + if book.ArchivePath == "" { + return book, errors.New("bookmark doesn't have archive") + } + + archiveFile, err := deps.Domains.Archiver.GetBookmarkArchiveFile(&book, "") + if err != nil { + return book, fmt.Errorf("error getting archive file: %w", err) } // This function create ebook from reader mode of bookmark so // we can't create ebook from PDF so we return error here if bookmark is a pdf - contentType := req.ContentType - if strings.Contains(contentType, "application/pdf") { + if strings.Contains(archiveFile.ContentType(), "application/pdf") { return book, errors.New("can't create ebook for pdf") } diff --git a/internal/core/ebook_test.go b/internal/core/ebook_test.go index 67b06d218..989407dd1 100644 --- a/internal/core/ebook_test.go +++ b/internal/core/ebook_test.go @@ -21,28 +21,24 @@ func TestGenerateEbook(t *testing.T) { t.Run("Successful ebook generate", func(t *testing.T) { t.Run("valid bookmarkId that return HasEbook true", func(t *testing.T) { - dstFile := "/ebook/1.epub" tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) - mockRequest := core.ProcessRequest{ + mockRequest := model.EbookProcessRequest{ Bookmark: model.BookmarkDTO{ ID: 1, Title: "Example Bookmark", HTML: "Example HTML", HasEbook: false, }, - DataDir: tmpDir, - ContentType: "text/html", } - bookmark, err := core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err := core.GenerateEbook(deps, mockRequest) assert.True(t, bookmark.HasEbook) assert.NoError(t, err) }) t.Run("ebook generate with valid BookmarkID EbookExist ImagePathExist ReturnWithHasEbookTrue", func(t *testing.T) { - dstFile := "/ebook/2.epub" tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) @@ -51,10 +47,8 @@ func TestGenerateEbook(t *testing.T) { ID: 2, HasEbook: false, } - mockRequest := core.ProcessRequest{ - Bookmark: bookmark, - DataDir: tmpDir, - ContentType: "text/html", + mockRequest := model.EbookProcessRequest{ + Bookmark: bookmark, } // Create the thumbnail file imagePath := model.GetThumbnailPath(&bookmark) @@ -66,14 +60,13 @@ func TestGenerateEbook(t *testing.T) { } defer file.Close() - bookmark, err = core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err = core.GenerateEbook(deps, mockRequest) expectedImagePath := string(fp.Separator) + fp.Join("bookmark", "2", "thumb") assert.NoError(t, err) assert.True(t, bookmark.HasEbook) assert.Equalf(t, expectedImagePath, bookmark.ImageURL, "Expected imageURL %s, but got %s", expectedImagePath, bookmark.ImageURL) }) t.Run("generate ebook valid BookmarkID EbookExist ReturnHasArchiveTrue", func(t *testing.T) { - dstFile := "/ebook/3.epub" tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) @@ -82,10 +75,8 @@ func TestGenerateEbook(t *testing.T) { ID: 3, HasEbook: false, } - mockRequest := core.ProcessRequest{ - Bookmark: bookmark, - DataDir: tmpDir, - ContentType: "text/html", + mockRequest := model.EbookProcessRequest{ + Bookmark: bookmark, } // Create the archive file archivePath := model.GetArchivePath(&bookmark) @@ -97,25 +88,21 @@ func TestGenerateEbook(t *testing.T) { } defer file.Close() - bookmark, err = core.GenerateEbook(deps, mockRequest, fp.Join(dstFile, "1")) + bookmark, err = core.GenerateEbook(deps, mockRequest) assert.True(t, bookmark.HasArchive) assert.NoError(t, err) }) }) t.Run("specific ebook generate case", func(t *testing.T) { t.Run("invalid bookmarkId that return Error", func(t *testing.T) { - dstFile := "/ebook/0.epub" - tmpDir := t.TempDir() - mockRequest := core.ProcessRequest{ + mockRequest := model.EbookProcessRequest{ Bookmark: model.BookmarkDTO{ ID: 0, HasEbook: false, }, - DataDir: tmpDir, - ContentType: "text/html", } - bookmark, err := core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err := core.GenerateEbook(deps, mockRequest) assert.Equal(t, model.BookmarkDTO{ ID: 0, @@ -124,7 +111,6 @@ func TestGenerateEbook(t *testing.T) { assert.EqualError(t, err, "bookmark ID is not valid") }) t.Run("ebook exist return HasEbook true", func(t *testing.T) { - dstFile := "/ebook/1.epub" tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) @@ -133,10 +119,8 @@ func TestGenerateEbook(t *testing.T) { ID: 1, HasEbook: false, } - mockRequest := core.ProcessRequest{ - Bookmark: bookmark, - DataDir: tmpDir, - ContentType: "text/html", + mockRequest := model.EbookProcessRequest{ + Bookmark: bookmark, } // Create the ebook file ebookPath := model.GetEbookPath(&bookmark) @@ -148,25 +132,20 @@ func TestGenerateEbook(t *testing.T) { } defer file.Close() - bookmark, err = core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err = core.GenerateEbook(deps, mockRequest) assert.True(t, bookmark.HasEbook) assert.NoError(t, err) }) t.Run("generate ebook valid BookmarkID RetuenError for PDF file", func(t *testing.T) { - dstFile := "/ebook/1.epub" - tmpDir := t.TempDir() - - mockRequest := core.ProcessRequest{ + mockRequest := model.EbookProcessRequest{ Bookmark: model.BookmarkDTO{ ID: 1, HasEbook: false, }, - DataDir: tmpDir, - ContentType: "application/pdf", } - bookmark, err := core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err := core.GenerateEbook(deps, mockRequest) assert.False(t, bookmark.HasEbook) assert.Error(t, err) diff --git a/internal/core/processing.go b/internal/core/processing.go index fdb156303..f5a32aef2 100644 --- a/internal/core/processing.go +++ b/internal/core/processing.go @@ -143,13 +143,12 @@ func ProcessBookmark(deps *dependencies.Dependencies, req ProcessRequest) (book // If needed, create ebook as well if book.CreateEbook { - ebookPath := model.GetEbookPath(&book) - req.Bookmark = book - if strings.Contains(contentType, "application/pdf") { return book, false, errors.Wrap(err, "can't create ebook from pdf") } else { - _, err = GenerateEbook(deps, req, ebookPath) + _, err = GenerateEbook(deps, model.EbookProcessRequest{ + Bookmark: book, + }) if err != nil { return book, true, errors.Wrap(err, "failed to create ebook") } diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index 965ad55df..15bb88845 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -15,7 +15,7 @@ type ArchiverDomain struct { archivers map[string]model.Archiver } -func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model.BookmarkDTO, error) { +func (d *ArchiverDomain) GenerateBookmarkArchive(book model.BookmarkDTO) (*model.BookmarkDTO, error) { content, contentType, err := core.DownloadBookmark(book.URL) if err != nil { return nil, fmt.Errorf("error downloading url: %s", err) @@ -24,6 +24,15 @@ func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model return d.ProcessBookmarkArchive(content, contentType, book) } +func (d *ArchiverDomain) GenerateBookmarkEbook(request model.EbookProcessRequest) error { + _, err := core.GenerateEbook(d.deps, request) + if err != nil { + return fmt.Errorf("error generating ebook: %s", err) + } + + return nil +} + func (d *ArchiverDomain) ProcessBookmarkArchive(content io.ReadCloser, contentType string, book model.BookmarkDTO) (*model.BookmarkDTO, error) { for _, archiver := range d.archivers { if archiver.Matches(contentType) { diff --git a/internal/http/routes/api/v1/bookmarks.go b/internal/http/routes/api/v1/bookmarks.go index 6b12fa486..4bf97ff50 100644 --- a/internal/http/routes/api/v1/bookmarks.go +++ b/internal/http/routes/api/v1/bookmarks.go @@ -183,7 +183,7 @@ func (r *BookmarksAPIRoutes) updateCache(c *gin.Context) { <-semaphore }() - result, err := r.deps.Domains.Archiver.DownloadBookmarkArchive(book) + result, err := r.deps.Domains.Archiver.GenerateBookmarkArchive(book) if err != nil { r.logger.WithFields(logrus.Fields{ "bookmark_id": book.ID, @@ -201,7 +201,21 @@ func (r *BookmarksAPIRoutes) updateCache(c *gin.Context) { } // Create ebook if needed - // TODO + if payload.CreateEbook { + err = r.deps.Domains.Archiver.GenerateBookmarkEbook(model.EbookProcessRequest{ + Bookmark: *result, + SkipExisting: payload.SkipExist, + }) + if err != nil { + r.logger.WithFields(logrus.Fields{ + "bookmark_id": book.ID, + "url": book.URL, + "error": err, + }).Error("error generating ebook") + chProblem <- book.ID + return + } + } // Update list of bookmarks mx.Lock() diff --git a/internal/http/routes/bookmark_test.go b/internal/http/routes/bookmark_test.go index 400dd95b2..3b72cbea7 100644 --- a/internal/http/routes/bookmark_test.go +++ b/internal/http/routes/bookmark_test.go @@ -155,7 +155,7 @@ func TestBookmarkFileHandlers(t *testing.T) { bookmarks, err := deps.Database.SaveBookmarks(context.TODO(), true, *bookmark) require.NoError(t, err) - bookmark, err = deps.Domains.Archiver.DownloadBookmarkArchive(bookmarks[0]) + bookmark, err = deps.Domains.Archiver.GenerateBookmarkArchive(bookmarks[0]) require.NoError(t, err) bookmarks, err = deps.Database.SaveBookmarks(context.TODO(), false, *bookmark) diff --git a/internal/model/archiver.go b/internal/model/archiver.go index 73a1288b6..2b1e6b8ac 100644 --- a/internal/model/archiver.go +++ b/internal/model/archiver.go @@ -58,6 +58,18 @@ func NewArchiveFile(reader io.Reader, contentType, encoding string, size int64) } } +type ArchiveProcessRequest struct { + Bookmark BookmarkDTO + Content io.Reader + ContentType string + SkipExisting bool +} + +type EbookProcessRequest struct { + Bookmark BookmarkDTO + SkipExisting bool +} + type Archiver interface { Archive(content io.ReadCloser, contentType string, bookmark BookmarkDTO) (*BookmarkDTO, error) Matches(contentType string) bool diff --git a/internal/model/domains.go b/internal/model/domains.go index 445c46522..a819db51d 100644 --- a/internal/model/domains.go +++ b/internal/model/domains.go @@ -24,7 +24,8 @@ type AccountsDomain interface { } type ArchiverDomain interface { - DownloadBookmarkArchive(book BookmarkDTO) (*BookmarkDTO, error) + GenerateBookmarkArchive(book BookmarkDTO) (*BookmarkDTO, error) + GenerateBookmarkEbook(book EbookProcessRequest) error ProcessBookmarkArchive(content io.ReadCloser, contentType string, book BookmarkDTO) (*BookmarkDTO, error) GetBookmarkArchiveFile(book *BookmarkDTO, archivePath string) (*ArchiveFile, error) } diff --git a/internal/webserver/handler-api-ext.go b/internal/webserver/handler-api-ext.go index fb809c4b3..cde127815 100644 --- a/internal/webserver/handler-api-ext.go +++ b/internal/webserver/handler-api-ext.go @@ -86,7 +86,7 @@ func (h *Handler) ApiInsertViaExtension(w http.ResponseWriter, r *http.Request, contentType := "text/html; charset=UTF-8" result, errArchiver = h.dependencies.Domains.Archiver.ProcessBookmarkArchive(io.NopCloser(strings.NewReader(request.HTML)), contentType, book) } else { - result, errArchiver = h.dependencies.Domains.Archiver.DownloadBookmarkArchive(book) + result, errArchiver = h.dependencies.Domains.Archiver.GenerateBookmarkArchive(book) } if errArchiver != nil { log.Printf("error downloading bookmark cache: %s", errArchiver) diff --git a/internal/webserver/handler-api.go b/internal/webserver/handler-api.go index 75a63d7b4..5b4b54218 100644 --- a/internal/webserver/handler-api.go +++ b/internal/webserver/handler-api.go @@ -22,7 +22,7 @@ import ( ) func downloadBookmarkContent(deps *dependencies.Dependencies, book *model.BookmarkDTO, keepTitle, keepExcerpt bool) (*model.BookmarkDTO, error) { - result, err := deps.Domains.Archiver.DownloadBookmarkArchive(*book) + result, err := deps.Domains.Archiver.GenerateBookmarkArchive(*book) if err != nil { return nil, fmt.Errorf("error archiving url: %s", err) } From 84559914d6f45eea6e8f52218435b1290bdc6f83 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 20:19:54 +0200 Subject: [PATCH 09/18] swagger --- docs/swagger/docs.go | 6 ++++++ docs/swagger/swagger.json | 6 ++++++ docs/swagger/swagger.yaml | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index 177e3280d..aef4143de 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -378,6 +378,12 @@ const docTemplate = `{ "model.BookmarkDTO": { "type": "object", "properties": { + "archivePath": { + "type": "string" + }, + "archiver": { + "type": "string" + }, "author": { "type": "string" }, diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index e5d54e957..f7024fd04 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -367,6 +367,12 @@ "model.BookmarkDTO": { "type": "object", "properties": { + "archivePath": { + "type": "string" + }, + "archiver": { + "type": "string" + }, "author": { "type": "string" }, diff --git a/docs/swagger/swagger.yaml b/docs/swagger/swagger.yaml index a40a179c3..f23769052 100644 --- a/docs/swagger/swagger.yaml +++ b/docs/swagger/swagger.yaml @@ -82,6 +82,10 @@ definitions: type: object model.BookmarkDTO: properties: + archivePath: + type: string + archiver: + type: string author: type: string create_archive: From e08334b27a3f185c5a240677699ddb68c1372619 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 20:20:01 +0200 Subject: [PATCH 10/18] ebook logic --- internal/core/ebook.go | 19 ++----------------- internal/core/ebook_test.go | 17 +---------------- 2 files changed, 3 insertions(+), 33 deletions(-) diff --git a/internal/core/ebook.go b/internal/core/ebook.go index 5ecbcb916..18f124ac5 100644 --- a/internal/core/ebook.go +++ b/internal/core/ebook.go @@ -1,11 +1,9 @@ package core import ( - "fmt" "os" fp "path/filepath" "strconv" - "strings" epub "github.com/go-shiori/go-epub" "github.com/go-shiori/shiori/internal/dependencies" @@ -29,28 +27,15 @@ func GenerateEbook(deps *dependencies.Dependencies, req model.EbookProcessReques return book, nil } + // Get current state of bookmark cheak archive and thumb strID := strconv.Itoa(book.ID) + bookmarkThumbnailPath := model.GetThumbnailPath(&book) if deps.Domains.Storage.FileExists(bookmarkThumbnailPath) { book.ImageURL = fp.Join("/", "bookmark", strID, "thumb") } - if book.ArchivePath == "" { - return book, errors.New("bookmark doesn't have archive") - } - - archiveFile, err := deps.Domains.Archiver.GetBookmarkArchiveFile(&book, "") - if err != nil { - return book, fmt.Errorf("error getting archive file: %w", err) - } - - // This function create ebook from reader mode of bookmark so - // we can't create ebook from PDF so we return error here if bookmark is a pdf - if strings.Contains(archiveFile.ContentType(), "application/pdf") { - return book, errors.New("can't create ebook for pdf") - } - // Create temporary epub file tmpFile, err := os.CreateTemp("", "ebook") if err != nil { diff --git a/internal/core/ebook_test.go b/internal/core/ebook_test.go index 989407dd1..128b0fcc8 100644 --- a/internal/core/ebook_test.go +++ b/internal/core/ebook_test.go @@ -66,7 +66,7 @@ func TestGenerateEbook(t *testing.T) { assert.True(t, bookmark.HasEbook) assert.Equalf(t, expectedImagePath, bookmark.ImageURL, "Expected imageURL %s, but got %s", expectedImagePath, bookmark.ImageURL) }) - t.Run("generate ebook valid BookmarkID EbookExist ReturnHasArchiveTrue", func(t *testing.T) { + t.Run("generate ebook valid BookmarkID EbookExist", func(t *testing.T) { tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) @@ -89,7 +89,6 @@ func TestGenerateEbook(t *testing.T) { defer file.Close() bookmark, err = core.GenerateEbook(deps, mockRequest) - assert.True(t, bookmark.HasArchive) assert.NoError(t, err) }) }) @@ -137,19 +136,5 @@ func TestGenerateEbook(t *testing.T) { assert.True(t, bookmark.HasEbook) assert.NoError(t, err) }) - t.Run("generate ebook valid BookmarkID RetuenError for PDF file", func(t *testing.T) { - mockRequest := model.EbookProcessRequest{ - Bookmark: model.BookmarkDTO{ - ID: 1, - HasEbook: false, - }, - } - - bookmark, err := core.GenerateEbook(deps, mockRequest) - - assert.False(t, bookmark.HasEbook) - assert.Error(t, err) - assert.EqualError(t, err, "can't create ebook for pdf") - }) }) } From add4ffe578df33ea058a59f79867e12307ab22d4 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 20:26:20 +0200 Subject: [PATCH 11/18] fixed valid bookmark for tests --- internal/testutil/shiori.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/testutil/shiori.go b/internal/testutil/shiori.go index 43ceb24d9..15cd3d8c2 100644 --- a/internal/testutil/shiori.go +++ b/internal/testutil/shiori.go @@ -50,7 +50,8 @@ func GetTestConfigurationAndDependencies(t *testing.T, ctx context.Context, logg func GetValidBookmark() *model.BookmarkDTO { uuidV4, _ := uuid.NewV4() return &model.BookmarkDTO{ - URL: "https://github.com/go-shiori/shiori#" + uuidV4.String(), - Title: "Shiori repository", + URL: "https://github.com/go-shiori/shiori#" + uuidV4.String(), + Title: "Shiori repository", + Archiver: model.ArchiverWARC, } } From 8c1ed876fe29cf79f18d08f1ba48ca81af752554 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Wed, 26 Jun 2024 21:35:06 +0200 Subject: [PATCH 12/18] merge changes --- ...r.up.sql => 0003_bookmark_archiver.up.sql} | 0 .../sqlite/0004_bookmark_archiver.up.sql | 24 ------------------- .../sqlite/0005_bookmark_archiver.up.sql | 2 ++ internal/database/mysql.go | 4 ++-- internal/database/pg.go | 4 ++-- internal/database/sqlite.go | 4 ++-- internal/domains/archiver.go | 17 ++++++++++--- internal/webserver/handler-api.go | 11 +++------ 8 files changed, 25 insertions(+), 41 deletions(-) rename internal/database/migrations/postgres/{0002_bookmark_archiver.up.sql => 0003_bookmark_archiver.up.sql} (100%) delete mode 100644 internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql create mode 100644 internal/database/migrations/sqlite/0005_bookmark_archiver.up.sql diff --git a/internal/database/migrations/postgres/0002_bookmark_archiver.up.sql b/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql similarity index 100% rename from internal/database/migrations/postgres/0002_bookmark_archiver.up.sql rename to internal/database/migrations/postgres/0003_bookmark_archiver.up.sql diff --git a/internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql b/internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql deleted file mode 100644 index bf7297733..000000000 --- a/internal/database/migrations/sqlite/0004_bookmark_archiver.up.sql +++ /dev/null @@ -1,24 +0,0 @@ --- Create a temporary table -CREATE TABLE IF NOT EXISTS bookmark_temp( - id INTEGER PRIMARY KEY AUTOINCREMENT, - url TEXT NOT NULL, - title TEXT NOT NULL, - excerpt TEXT NOT NULL DEFAULT "", - author TEXT NOT NULL DEFAULT "", - public INTEGER NOT NULL DEFAULT 0, - modified TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, - has_content BOOLEAN DEFAULT FALSE NOT NULL, - archiver TEXT NOT NULL DEFAULT "", - archive_path TEXT NOT NULL DEFAULT "", - CONSTRAINT bookmark_url_UNIQUE UNIQUE(url) -); - --- Copy data from the original table to the temporary table -INSERT INTO bookmark_temp (id, url, title, excerpt, author, public, modified, has_content, archiver, archive_path) -SELECT id, url, title, excerpt, author, public, modified, has_content, "warc", "" FROM bookmark; - --- Drop the original table -DROP TABLE bookmark; - --- Rename the temporary table to the original table name -ALTER TABLE bookmark_temp RENAME TO bookmark; diff --git a/internal/database/migrations/sqlite/0005_bookmark_archiver.up.sql b/internal/database/migrations/sqlite/0005_bookmark_archiver.up.sql new file mode 100644 index 000000000..b0ac92825 --- /dev/null +++ b/internal/database/migrations/sqlite/0005_bookmark_archiver.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE bookmark ADD COLUMN archiver TEXT NOT NULL DEFAULT ''; +ALTER TABLE bookmark ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; diff --git a/internal/database/mysql.go b/internal/database/mysql.go index a183b5707..97eb836d3 100644 --- a/internal/database/mysql.go +++ b/internal/database/mysql.go @@ -176,7 +176,7 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar public = ?, content = ?, html = ?, - modified_at = ? + modified_at = ?, archiver = ?, archive_path = ? WHERE id = ?`) @@ -603,7 +603,7 @@ func (db *MySQLDatabase) GetBookmark(ctx context.Context, id int, url string) (m args := []interface{}{id} query := `SELECT id, url, title, excerpt, author, public, - content, html, modified_at, created_at, content <> '' has_content + content, html, modified_at, created_at, content <> '' has_content, archiver, archive_path FROM bookmark WHERE id = ?` diff --git a/internal/database/pg.go b/internal/database/pg.go index 2586d676c..87cfbb09a 100644 --- a/internal/database/pg.go +++ b/internal/database/pg.go @@ -145,7 +145,7 @@ func (db *PGDatabase) SaveBookmarks(ctx context.Context, create bool, bookmarks public = $5, content = $6, html = $7, - modified_at = $8 + modified_at = $8, archiver = $9, archive_path = $10 WHERE id = $11`) @@ -581,7 +581,7 @@ func (db *PGDatabase) GetBookmark(ctx context.Context, id int, url string) (mode args := []interface{}{id} query := `SELECT id, url, title, excerpt, author, public, - content, html, modified_at, created_at, content <> '' has_content + content, html, modified_at, created_at, content <> '' has_content, archiver, archive_path FROM bookmark WHERE id = $1` diff --git a/internal/database/sqlite.go b/internal/database/sqlite.go index a3c425192..4ac01d14a 100644 --- a/internal/database/sqlite.go +++ b/internal/database/sqlite.go @@ -137,7 +137,7 @@ func (db *SQLiteDatabase) SaveBookmarks(ctx context.Context, create bool, bookma stmtUpdateBook, err := tx.PreparexContext(ctx, `UPDATE bookmark SET url = ?, title = ?, excerpt = ?, author = ?, - public = ?, modified_at = ?, has_content = ? + public = ?, modified_at = ?, has_content = ?, archiver = ?, archive_path = ? WHERE id = ?`) if err != nil { @@ -304,7 +304,7 @@ func (db *SQLiteDatabase) GetBookmarks(ctx context.Context, opts GetBookmarksOpt b.public, b.created_at, b.modified_at, - b.has_content + b.has_content, b.archiver, b.archive_path FROM bookmark b diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index 15bb88845..39797c3a1 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -1,6 +1,7 @@ package domains import ( + "context" "fmt" "io" @@ -21,7 +22,17 @@ func (d *ArchiverDomain) GenerateBookmarkArchive(book model.BookmarkDTO) (*model return nil, fmt.Errorf("error downloading url: %s", err) } - return d.ProcessBookmarkArchive(content, contentType, book) + processedBookmark, err := d.ProcessBookmarkArchive(content, contentType, book) + if err != nil { + return nil, fmt.Errorf("error processing bookmark archive: %w", err) + } + + saved, err := d.deps.Database.SaveBookmarks(context.Background(), false, *processedBookmark) + if err != nil { + return nil, fmt.Errorf("error saving bookmark: %w", err) + } + + return &saved[0], nil } func (d *ArchiverDomain) GenerateBookmarkEbook(request model.EbookProcessRequest) error { @@ -36,12 +47,12 @@ func (d *ArchiverDomain) GenerateBookmarkEbook(request model.EbookProcessRequest func (d *ArchiverDomain) ProcessBookmarkArchive(content io.ReadCloser, contentType string, book model.BookmarkDTO) (*model.BookmarkDTO, error) { for _, archiver := range d.archivers { if archiver.Matches(contentType) { - _, err := archiver.Archive(content, contentType, book) + book, err := archiver.Archive(content, contentType, book) if err != nil { d.deps.Log.Errorf("Error archiving bookmark with archviver: %s", err) continue } - return &book, nil + return book, nil } } diff --git a/internal/webserver/handler-api.go b/internal/webserver/handler-api.go index 3722b62d3..33d239c47 100644 --- a/internal/webserver/handler-api.go +++ b/internal/webserver/handler-api.go @@ -1,7 +1,6 @@ package webserver import ( - "context" "encoding/json" "fmt" "log" @@ -232,14 +231,12 @@ func (h *Handler) ApiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h if payload.Async { go func() { - bookmark, err := downloadBookmarkContent(h.dependencies, book, userHasDefinedTitle, book.Excerpt != "") + book, err = downloadBookmarkContent(h.dependencies, book, userHasDefinedTitle, book.Excerpt != "") if err != nil { log.Printf("error downloading boorkmark: %s", err) return } - if _, err := h.DB.SaveBookmarks(context.Background(), false, *bookmark); err != nil { - log.Printf("failed to save bookmark: %s", err) - } + }() } else { // Workaround. Download content after saving the bookmark so we have the proper database @@ -247,14 +244,12 @@ func (h *Handler) ApiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h book, err = downloadBookmarkContent(h.dependencies, book, userHasDefinedTitle, book.Excerpt != "") if err != nil { log.Printf("error downloading boorkmark: %s", err) - } else if _, err := h.DB.SaveBookmarks(ctx, false, *book); err != nil { - log.Printf("failed to save bookmark: %s", err) } } // Return the new bookmark w.Header().Set("Content-Type", "application/json") - err = json.NewEncoder(w).Encode(results[0]) + err = json.NewEncoder(w).Encode(book) checkError(err) } From ed36552c82158d58f0ed2306626c7a91770c38af Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Sat, 12 Oct 2024 17:39:08 +0200 Subject: [PATCH 13/18] missing mysql migration --- .../database/migrations/mysql/0011_bookmark_archiver.up.sql | 3 +++ internal/database/mysql.go | 1 + 2 files changed, 4 insertions(+) create mode 100644 internal/database/migrations/mysql/0011_bookmark_archiver.up.sql diff --git a/internal/database/migrations/mysql/0011_bookmark_archiver.up.sql b/internal/database/migrations/mysql/0011_bookmark_archiver.up.sql new file mode 100644 index 000000000..3c23d72a8 --- /dev/null +++ b/internal/database/migrations/mysql/0011_bookmark_archiver.up.sql @@ -0,0 +1,3 @@ +ALTER TABLE bookmark + ADD COLUMN archiver TEXT NOT NULL DEFAULT '', + ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; diff --git a/internal/database/mysql.go b/internal/database/mysql.go index 97eb836d3..c486e198a 100644 --- a/internal/database/mysql.go +++ b/internal/database/mysql.go @@ -92,6 +92,7 @@ var mysqlMigrations = []migration{ return nil }), + newFileMigration("0.9.0", "0.9.1", "mysql/0011_bookmark_archiver"), } // MySQLDatabase is implementation of Database interface From a494f9bed31d0c662e21654cca6dcd6c96b1c197 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Sat, 12 Oct 2024 17:39:27 +0200 Subject: [PATCH 14/18] new data structure to pass data along --- internal/archiver/pdf.go | 15 +++++++------- internal/archiver/warc.go | 14 ++++++------- internal/domains/archiver.go | 18 +++++++++++----- internal/model/archiver.go | 30 +++++++++++++++++++-------- internal/model/domains.go | 2 +- internal/webserver/handler-api-ext.go | 6 ++---- 6 files changed, 52 insertions(+), 33 deletions(-) diff --git a/internal/archiver/pdf.go b/internal/archiver/pdf.go index b720199c6..78cebc61c 100644 --- a/internal/archiver/pdf.go +++ b/internal/archiver/pdf.go @@ -2,7 +2,6 @@ package archiver import ( "fmt" - "io" "strings" "github.com/go-shiori/shiori/internal/dependencies" @@ -13,20 +12,22 @@ type PDFArchiver struct { deps *dependencies.Dependencies } -func (a *PDFArchiver) Matches(contentType string) bool { - return strings.Contains(contentType, "application/pdf") +func (a *PDFArchiver) Matches(archiverReq *model.ArchiverRequest) bool { + return strings.Contains(archiverReq.ContentType, "application/pdf") } -func (a *PDFArchiver) Archive(content io.ReadCloser, contentType string, bookmark model.BookmarkDTO) (*model.BookmarkDTO, error) { - if err := a.deps.Domains.Storage.WriteReader(model.GetArchivePath(&bookmark), content); err != nil { +func (a *PDFArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) { + bookmark := &archiverReq.Bookmark + + if err := a.deps.Domains.Storage.WriteData(model.GetArchivePath(bookmark), archiverReq.Content); err != nil { return nil, fmt.Errorf("error saving pdf archive: %v", err) } - bookmark.ArchivePath = model.GetArchivePath(&bookmark) + bookmark.ArchivePath = model.GetArchivePath(bookmark) bookmark.HasArchive = true bookmark.Archiver = model.ArchiverPDF - return &bookmark, nil + return bookmark, nil } func (a *PDFArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { diff --git a/internal/archiver/warc.go b/internal/archiver/warc.go index bdfaa3483..431995e54 100644 --- a/internal/archiver/warc.go +++ b/internal/archiver/warc.go @@ -1,8 +1,8 @@ package archiver import ( + "bytes" "fmt" - "io" "path/filepath" "strings" @@ -20,20 +20,20 @@ type WARCArchiver struct { deps *dependencies.Dependencies } -func (a *WARCArchiver) Matches(contentType string) bool { +func (a *WARCArchiver) Matches(archiverReq *model.ArchiverRequest) bool { + // TODO: set to true for now as catch-all but we will remove this archiver soon return true } -func (a *WARCArchiver) Archive(content io.ReadCloser, contentType string, bookmark model.BookmarkDTO) (*model.BookmarkDTO, error) { +func (a *WARCArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) { processRequest := core.ProcessRequest{ DataDir: a.deps.Config.Storage.DataDir, - Bookmark: bookmark, - Content: content, - ContentType: contentType, + Bookmark: archiverReq.Bookmark, + Content: bytes.NewReader(archiverReq.Content), + ContentType: archiverReq.ContentType, } result, isFatalErr, err := core.ProcessBookmark(a.deps, processRequest) - content.Close() if err != nil && isFatalErr { return nil, fmt.Errorf("failed to process: %v", err) diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index 39797c3a1..69206f4f3 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -22,7 +22,15 @@ func (d *ArchiverDomain) GenerateBookmarkArchive(book model.BookmarkDTO) (*model return nil, fmt.Errorf("error downloading url: %s", err) } - processedBookmark, err := d.ProcessBookmarkArchive(content, contentType, book) + contentBytes, err := io.ReadAll(content) + if err != nil { + return nil, fmt.Errorf("error reading content: %s", err) + } + content.Close() + + archiverReq := model.NewArchiverRequest(book, contentType, contentBytes) + + processedBookmark, err := d.ProcessBookmarkArchive(archiverReq) if err != nil { return nil, fmt.Errorf("error processing bookmark archive: %w", err) } @@ -44,10 +52,10 @@ func (d *ArchiverDomain) GenerateBookmarkEbook(request model.EbookProcessRequest return nil } -func (d *ArchiverDomain) ProcessBookmarkArchive(content io.ReadCloser, contentType string, book model.BookmarkDTO) (*model.BookmarkDTO, error) { +func (d *ArchiverDomain) ProcessBookmarkArchive(archiverRequest *model.ArchiverRequest) (*model.BookmarkDTO, error) { for _, archiver := range d.archivers { - if archiver.Matches(contentType) { - book, err := archiver.Archive(content, contentType, book) + if archiver.Matches(archiverRequest) { + book, err := archiver.Archive(archiverRequest) if err != nil { d.deps.Log.Errorf("Error archiving bookmark with archviver: %s", err) continue @@ -56,7 +64,7 @@ func (d *ArchiverDomain) ProcessBookmarkArchive(content io.ReadCloser, contentTy } } - return nil, fmt.Errorf("no archiver found for content type: %s", contentType) + return nil, fmt.Errorf("no archiver found for request: %s", archiverRequest.String()) } func (d *ArchiverDomain) GetBookmarkArchiveFile(book *model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { diff --git a/internal/model/archiver.go b/internal/model/archiver.go index 2b1e6b8ac..0f068b101 100644 --- a/internal/model/archiver.go +++ b/internal/model/archiver.go @@ -1,6 +1,7 @@ package model import ( + "fmt" "io" "strconv" ) @@ -10,6 +11,24 @@ const ( ArchiverWARC = "warc" ) +type ArchiverRequest struct { + Bookmark BookmarkDTO + Content []byte + ContentType string +} + +func (a *ArchiverRequest) String() string { + return fmt.Sprintf("ArchiverRequest{ContentType: %s}", a.ContentType) +} + +func NewArchiverRequest(bookmark BookmarkDTO, contentType string, content []byte) *ArchiverRequest { + return &ArchiverRequest{ + Bookmark: bookmark, + Content: content, + ContentType: contentType, + } +} + type ArchiveFile struct { reader io.Reader contentType string @@ -58,20 +77,13 @@ func NewArchiveFile(reader io.Reader, contentType, encoding string, size int64) } } -type ArchiveProcessRequest struct { - Bookmark BookmarkDTO - Content io.Reader - ContentType string - SkipExisting bool -} - type EbookProcessRequest struct { Bookmark BookmarkDTO SkipExisting bool } type Archiver interface { - Archive(content io.ReadCloser, contentType string, bookmark BookmarkDTO) (*BookmarkDTO, error) - Matches(contentType string) bool + Archive(*ArchiverRequest) (*BookmarkDTO, error) + Matches(*ArchiverRequest) bool GetArchiveFile(bookmark BookmarkDTO, resourcePath string) (*ArchiveFile, error) } diff --git a/internal/model/domains.go b/internal/model/domains.go index a819db51d..aae287a9e 100644 --- a/internal/model/domains.go +++ b/internal/model/domains.go @@ -26,7 +26,7 @@ type AccountsDomain interface { type ArchiverDomain interface { GenerateBookmarkArchive(book BookmarkDTO) (*BookmarkDTO, error) GenerateBookmarkEbook(book EbookProcessRequest) error - ProcessBookmarkArchive(content io.ReadCloser, contentType string, book BookmarkDTO) (*BookmarkDTO, error) + ProcessBookmarkArchive(*ArchiverRequest) (*BookmarkDTO, error) GetBookmarkArchiveFile(book *BookmarkDTO, archivePath string) (*ArchiveFile, error) } diff --git a/internal/webserver/handler-api-ext.go b/internal/webserver/handler-api-ext.go index cde127815..f062ab95c 100644 --- a/internal/webserver/handler-api-ext.go +++ b/internal/webserver/handler-api-ext.go @@ -3,13 +3,11 @@ package webserver import ( "encoding/json" "fmt" - "io" "log" "net/http" "os" fp "path/filepath" "strconv" - "strings" "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/model" @@ -83,8 +81,8 @@ func (h *Handler) ApiInsertViaExtension(w http.ResponseWriter, r *http.Request, var result *model.BookmarkDTO var errArchiver error if request.HTML != "" { - contentType := "text/html; charset=UTF-8" - result, errArchiver = h.dependencies.Domains.Archiver.ProcessBookmarkArchive(io.NopCloser(strings.NewReader(request.HTML)), contentType, book) + archiverReq := model.NewArchiverRequest(book, "text/html; charset=UTF-8", []byte(request.HTML)) + result, errArchiver = h.dependencies.Domains.Archiver.ProcessBookmarkArchive(archiverReq) } else { result, errArchiver = h.dependencies.Domains.Archiver.GenerateBookmarkArchive(book) } From f26ca1ca4704a79293a5296f528f6349ff0d460a Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Sat, 12 Oct 2024 17:39:38 +0200 Subject: [PATCH 15/18] simplified migration --- .../migrations/postgres/0003_bookmark_archiver.up.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql b/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql index b0ac92825..3c23d72a8 100644 --- a/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql +++ b/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql @@ -1,2 +1,3 @@ -ALTER TABLE bookmark ADD COLUMN archiver TEXT NOT NULL DEFAULT ''; -ALTER TABLE bookmark ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; +ALTER TABLE bookmark + ADD COLUMN archiver TEXT NOT NULL DEFAULT '', + ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; From 9f5c8abda78af32b4fc687b0b2b74630e02aedea Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Sat, 12 Oct 2024 17:41:52 +0200 Subject: [PATCH 16/18] typo --- internal/database/mysql_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/database/mysql_test.go b/internal/database/mysql_test.go index 5ee4e3587..d363e06e5 100644 --- a/internal/database/mysql_test.go +++ b/internal/database/mysql_test.go @@ -56,6 +56,6 @@ func mysqlTestDatabaseFactory(_ *testing.T, ctx context.Context) (DB, error) { return db, err } -func TestMysqlsDatabase(t *testing.T) { +func TestMySQLDatabase(t *testing.T) { testDatabase(t, mysqlTestDatabaseFactory) } From 71cf73e4aaabd9fa427308f17085fee169867baa Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Sat, 12 Oct 2024 18:16:33 +0200 Subject: [PATCH 17/18] fixed mysql migration --- internal/database/mysql.go | 27 +-------------------------- internal/database/mysql_test.go | 3 ++- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/internal/database/mysql.go b/internal/database/mysql.go index c486e198a..d61f2cdc7 100644 --- a/internal/database/mysql.go +++ b/internal/database/mysql.go @@ -67,32 +67,7 @@ var mysqlMigrations = []migration{ newFileMigration("0.8.2", "0.8.3", "mysql/0008_set_modified_at_equal_created_at"), newFileMigration("0.8.3", "0.8.4", "mysql/0009_index_for_created_at"), newFileMigration("0.8.4", "0.8.5", "mysql/0010_index_for_modified_at"), - // Adds archiver and archive_path columns to bookmark table - newFuncMigration("0.8.4", "0.9.0", func(db *sql.DB) error { - tx, err := db.Begin() - if err != nil { - return fmt.Errorf("failed to start transaction: %w", err) - } - - defer tx.Rollback() - - _, err = tx.Exec(`ALTER TABLE bookmark ADD COLUMN archiver TEXT NOT NULL DEFAULT ''`) - if err != nil { - return fmt.Errorf("failed to add archiver column to bookmark_tag table: %w", err) - } - - _, err = tx.Exec(`ALTER TABLE bookmark ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''`) - if err != nil { - return fmt.Errorf("failed to add archiver column to bookmark_tag table: %w", err) - } - - if err := tx.Commit(); err != nil { - return fmt.Errorf("failed to commit transaction: %w", err) - } - - return nil - }), - newFileMigration("0.9.0", "0.9.1", "mysql/0011_bookmark_archiver"), + newFileMigration("0.8.5", "0.9.0", "mysql/0011_bookmark_archiver"), } // MySQLDatabase is implementation of Database interface diff --git a/internal/database/mysql_test.go b/internal/database/mysql_test.go index d363e06e5..ded0a7951 100644 --- a/internal/database/mysql_test.go +++ b/internal/database/mysql_test.go @@ -39,6 +39,7 @@ func mysqlTestDatabaseFactory(_ *testing.T, ctx context.Context) (DB, error) { } _, err = tx.ExecContext(ctx, "CREATE DATABASE "+dbname) + return err }) if err != nil { @@ -53,7 +54,7 @@ func mysqlTestDatabaseFactory(_ *testing.T, ctx context.Context) (DB, error) { return nil, err } - return db, err + return db, nil } func TestMySQLDatabase(t *testing.T) { From 041526f8cb70194dc249a113330fb8d069271834 Mon Sep 17 00:00:00 2001 From: "Felipe M." Date: Sun, 13 Oct 2024 12:24:01 +0200 Subject: [PATCH 18/18] migrate archiver/archive_path columns --- go.mod | 2 + go.sum | 6 + internal/database/migrations.go | 6 +- .../migrations/0001_migrate_archiver.go | 141 ++++++++++++++++++ internal/database/mysql.go | 11 +- internal/database/pg.go | 11 +- internal/database/sqlite.go | 8 + internal/database/sqlite_noncgo.go | 3 +- internal/database/sqlite_openbsd.go | 3 +- internal/domains/bookmarks_test.go | 16 +- 10 files changed, 184 insertions(+), 23 deletions(-) create mode 100644 internal/database/migrations/0001_migrate_archiver.go diff --git a/go.mod b/go.mod index 6127ef5c8..8c4840a5a 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/go-sql-driver/mysql v1.8.1 github.com/gofrs/uuid/v5 v5.2.0 github.com/golang-jwt/jwt/v5 v5.2.1 + github.com/huandu/go-sqlbuilder v1.30.1 github.com/jmoiron/sqlx v1.4.0 github.com/julienschmidt/httprouter v1.3.0 github.com/lib/pq v1.10.9 @@ -86,6 +87,7 @@ require ( github.com/golang/protobuf v1.5.4 // indirect github.com/google/uuid v1.6.0 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/huandu/xstrings v1.4.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect diff --git a/go.sum b/go.sum index 94caf4f6b..c63f76162 100644 --- a/go.sum +++ b/go.sum @@ -129,6 +129,12 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rH github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/huandu/go-assert v1.1.6 h1:oaAfYxq9KNDi9qswn/6aE0EydfxSa+tWZC1KabNitYs= +github.com/huandu/go-assert v1.1.6/go.mod h1:JuIfbmYG9ykwvuxoJ3V8TB5QP+3+ajIA54Y44TmkMxs= +github.com/huandu/go-sqlbuilder v1.30.1 h1:rsneJuMBZcGpxK6YQcVtKclhFT0wbM2gmOqlTXaQc2w= +github.com/huandu/go-sqlbuilder v1.30.1/go.mod h1:mS0GAtrtW+XL6nM2/gXHRJax2RwSW1TraavWDFAc1JA= +github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU= +github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= diff --git a/internal/database/migrations.go b/internal/database/migrations.go index dc4208491..c509d2d84 100644 --- a/internal/database/migrations.go +++ b/internal/database/migrations.go @@ -13,10 +13,12 @@ import ( //go:embed migrations/* var migrationFiles embed.FS +type migrationFunc func(db *sql.DB) error + type migration struct { fromVersion semver.Version toVersion semver.Version - migrationFunc func(db *sql.DB) error + migrationFunc migrationFunc } // txFunc is a function that runs in a transaction. @@ -42,7 +44,7 @@ func runInTransaction(db *sql.DB, fn txFn) error { } // newFuncMigration creates a new migration from a function. -func newFuncMigration(fromVersion, toVersion string, migrationFunc func(db *sql.DB) error) migration { +func newFuncMigration(fromVersion, toVersion string, migrationFunc migrationFunc) migration { return migration{ fromVersion: semver.MustParse(fromVersion), toVersion: semver.MustParse(toVersion), diff --git a/internal/database/migrations/0001_migrate_archiver.go b/internal/database/migrations/0001_migrate_archiver.go new file mode 100644 index 000000000..40b500d49 --- /dev/null +++ b/internal/database/migrations/0001_migrate_archiver.go @@ -0,0 +1,141 @@ +package migrations + +import ( + "context" + "database/sql" + "fmt" + "os" + "path/filepath" + "slices" + + "github.com/huandu/go-sqlbuilder" + "github.com/jmoiron/sqlx" + gap "github.com/muesli/go-app-paths" +) + +// getPortableModeEnabled_171 checks if portable mode is enabled by naively checking the +// os.Args for the --portable flag. This is a workaround to use in this migration with the +// current state of the code as of 1.7.1. +func getPortableModeEnabled_171() bool { + return slices.Contains(os.Args, "--portable") +} + +// getStorageDirectory_170 returns the directory where shiori data is stored +// for the 1.7.1 version of shiori. +// This function is just a copy of the original as of 1.7.1. +func getStorageDirectory_171(portableMode bool) (string, error) { + // If in portable mode, uses directory of executable + if portableMode { + exePath, err := os.Executable() + if err != nil { + return "", err + } + + exeDir := filepath.Dir(exePath) + return filepath.Join(exeDir, "shiori-data"), nil + } + + // Try to use platform specific app path + userScope := gap.NewScope(gap.User, "shiori") + dataDir, err := userScope.DataPath("") + if err == nil { + return dataDir, nil + } + + return "", fmt.Errorf("couldn't determine the data directory") +} + +// getDataDir_171 returns the directory where shiori data is stored using the logic flow +// of the 1.7.1 version of shiori. +func getDataDir_171() (string, error) { + dataDir := os.Getenv("SHIORI_DIR") + if dataDir == "" { + var err error + dataDir, err = getStorageDirectory_171(getPortableModeEnabled_171()) + if err != nil { + return "", fmt.Errorf("failed to get data directory: %w", err) + } + } + return dataDir, nil +} + +// MigrateArchiver adds new columns for the archiver and archiver_path +// This migration manually checks that the existing bookmarks have a file in the default archive path: +// SHIORI_DIR/archives/ID +// If the file exists, it will update the archiver=warc (the only one at this point) and archiver_path=path +// This migration is driver agnostic. +func MigrateArchiverMigration(sqlDB *sql.DB, driver string) error { + var flavor sqlbuilder.Flavor + switch driver { + case "mysql": + flavor = sqlbuilder.MySQL + case "postgres": + flavor = sqlbuilder.PostgreSQL + case "sqlite": + flavor = sqlbuilder.SQLite + default: + return fmt.Errorf("unsupported driver: %s", driver) + } + + ctx := context.Background() + sqlX := sqlx.NewDb(sqlDB, driver) + + tx, err := sqlX.Begin() + if err != nil { + return fmt.Errorf("failed to start transaction: %w", err) + } + defer tx.Rollback() + + perPage := 50 + page := 1 + + for { + var bookmarkIDs []int + sb := sqlbuilder.NewSelectBuilder() + sb.SetFlavor(flavor) + sb.Select("id") + sb.From("bookmark") + sb.OrderBy("id ASC") + sb.Where(sb.Equal("archiver", "")) + sb.Limit(perPage) + sb.Offset((page - 1) * perPage) + + sqlQuery, args := sb.Build() + if err := sqlX.Select(&bookmarkIDs, sqlQuery, args...); err != nil { + return fmt.Errorf("failed to get bookmarks: %w", err) + } + + if len(bookmarkIDs) == 0 { + break + } + + dataDir, err := getDataDir_171() + if err != nil { + return fmt.Errorf("failed to get data directory: %w", err) + } + + for _, bookID := range bookmarkIDs { + archivePath := filepath.Join(dataDir, "archive", fmt.Sprintf("%d", bookID)) + + // If the file exists, we assume it's a WARC file and update the row + if _, err := os.Stat(archivePath); err == nil { + sb := sqlbuilder.NewUpdateBuilder() + sb.Update("bookmark") + sb.Set( + sb.Assign("archiver", "warc"), + sb.Assign("archive_path", archivePath), + ) + sb.Where(sb.Equal("id", bookID)) + + sqlQuery, args := sb.Build() + if _, err := tx.ExecContext(ctx, sqlQuery, args...); err != nil { + return fmt.Errorf("failed to update bookmark %d: %w", bookID, err) + } + } + } + + page++ + } + + return tx.Commit() +} diff --git a/internal/database/mysql.go b/internal/database/mysql.go index d61f2cdc7..29c8331db 100644 --- a/internal/database/mysql.go +++ b/internal/database/mysql.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "github.com/go-shiori/shiori/internal/database/migrations" "github.com/go-shiori/shiori/internal/model" "github.com/jmoiron/sqlx" "github.com/pkg/errors" @@ -68,6 +69,9 @@ var mysqlMigrations = []migration{ newFileMigration("0.8.3", "0.8.4", "mysql/0009_index_for_created_at"), newFileMigration("0.8.4", "0.8.5", "mysql/0010_index_for_modified_at"), newFileMigration("0.8.5", "0.9.0", "mysql/0011_bookmark_archiver"), + newFuncMigration("0.9.0", "0.9.1", func(db *sql.DB) error { + return migrations.MigrateArchiverMigration(db, "mysql") + }), } // MySQLDatabase is implementation of Database interface @@ -76,6 +80,10 @@ type MySQLDatabase struct { dbbase } +func mysqlDatabaseFromDB(db *sqlx.DB) *MySQLDatabase { + return &MySQLDatabase{dbbase: dbbase{db}} +} + // OpenMySQLDatabase creates and opens connection to a MySQL Database. func OpenMySQLDatabase(ctx context.Context, connString string) (mysqlDB *MySQLDatabase, err error) { // Open database and start transaction @@ -87,8 +95,7 @@ func OpenMySQLDatabase(ctx context.Context, connString string) (mysqlDB *MySQLDa db.SetMaxOpenConns(100) db.SetConnMaxLifetime(time.Second) // in case mysql client has longer timeout (driver issue #674) - mysqlDB = &MySQLDatabase{dbbase: dbbase{db}} - return mysqlDB, err + return mysqlDatabaseFromDB(db), err } // DBX returns the underlying sqlx.DB object diff --git a/internal/database/pg.go b/internal/database/pg.go index 87cfbb09a..97e498378 100644 --- a/internal/database/pg.go +++ b/internal/database/pg.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "github.com/go-shiori/shiori/internal/database/migrations" "github.com/go-shiori/shiori/internal/model" "github.com/jmoiron/sqlx" "github.com/pkg/errors" @@ -59,6 +60,9 @@ var postgresMigrations = []migration{ }), newFileMigration("0.3.0", "0.4.0", "postgres/0002_created_time"), newFileMigration("0.4.0", "0.5.0", "postgres/0003_bookmark_archiver"), + newFuncMigration("0.5.0", "0.5.1", func(db *sql.DB) error { + return migrations.MigrateArchiverMigration(db, "postgres") + }), } // PGDatabase is implementation of Database interface @@ -67,6 +71,10 @@ type PGDatabase struct { dbbase } +func postgresDatabaseFromDB(db *sqlx.DB) *PGDatabase { + return &PGDatabase{dbbase: dbbase{db}} +} + // OpenPGDatabase creates and opens connection to a PostgreSQL Database. func OpenPGDatabase(ctx context.Context, connString string) (pgDB *PGDatabase, err error) { // Open database and start transaction @@ -78,8 +86,7 @@ func OpenPGDatabase(ctx context.Context, connString string) (pgDB *PGDatabase, e db.SetMaxOpenConns(100) db.SetConnMaxLifetime(time.Second) - pgDB = &PGDatabase{dbbase: dbbase{db}} - return pgDB, err + return postgresDatabaseFromDB(db), err } // DBX returns the underlying sqlx.DB object diff --git a/internal/database/sqlite.go b/internal/database/sqlite.go index 4ac01d14a..77dcfdef6 100644 --- a/internal/database/sqlite.go +++ b/internal/database/sqlite.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/go-shiori/shiori/internal/database/migrations" "github.com/go-shiori/shiori/internal/model" "github.com/jmoiron/sqlx" "github.com/pkg/errors" @@ -62,6 +63,13 @@ var sqliteMigrations = []migration{ newFileMigration("0.4.0", "0.5.0", "sqlite/0003_uniq_id"), newFileMigration("0.5.0", "0.6.0", "sqlite/0004_created_time"), newFileMigration("0.6.0", "0.7.0", "sqlite/0005_bookmark_archiver"), + newFuncMigration("0.7.0", "0.8.0", func(db *sql.DB) error { + return migrations.MigrateArchiverMigration(db, "sqlite") + }), +} + +func sqliteDatabaseFromDB(db *sqlx.DB) *SQLiteDatabase { + return &SQLiteDatabase{dbbase: dbbase{db}} } // SQLiteDatabase is implementation of Database interface diff --git a/internal/database/sqlite_noncgo.go b/internal/database/sqlite_noncgo.go index f2ee6b9c1..10b39b239 100644 --- a/internal/database/sqlite_noncgo.go +++ b/internal/database/sqlite_noncgo.go @@ -20,6 +20,5 @@ func OpenSQLiteDatabase(ctx context.Context, databasePath string) (sqliteDB *SQL return nil, errors.WithStack(err) } - sqliteDB = &SQLiteDatabase{dbbase: dbbase{db}} - return sqliteDB, nil + return sqliteDatabaseFromDB(db), nil } diff --git a/internal/database/sqlite_openbsd.go b/internal/database/sqlite_openbsd.go index 64d9c7d00..404017f79 100644 --- a/internal/database/sqlite_openbsd.go +++ b/internal/database/sqlite_openbsd.go @@ -21,6 +21,5 @@ func OpenSQLiteDatabase(ctx context.Context, databasePath string) (sqliteDB *SQL return nil, errors.WithStack(err) } - sqliteDB = &SQLiteDatabase{dbbase: dbbase{db}} - return sqliteDB, nil + return sqliteDatabaseFromDB(db), nil } diff --git a/internal/domains/bookmarks_test.go b/internal/domains/bookmarks_test.go index c02f16960..60dabaf89 100644 --- a/internal/domains/bookmarks_test.go +++ b/internal/domains/bookmarks_test.go @@ -4,9 +4,6 @@ import ( "context" "testing" - "github.com/go-shiori/shiori/internal/config" - "github.com/go-shiori/shiori/internal/database" - "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/domains" "github.com/go-shiori/shiori/internal/model" "github.com/go-shiori/shiori/internal/testutil" @@ -17,17 +14,10 @@ import ( func TestBookmarkDomain(t *testing.T) { fs := afero.NewMemMapFs() + ctx := context.Background() + logger := logrus.New() - db, err := database.OpenSQLiteDatabase(context.TODO(), ":memory:") - require.NoError(t, err) - require.NoError(t, db.Migrate(context.TODO())) - - deps := &dependencies.Dependencies{ - Database: db, - Config: config.ParseServerConfiguration(context.TODO(), logrus.New()), - Log: logrus.New(), - Domains: &dependencies.Domains{}, - } + _, deps := testutil.GetTestConfigurationAndDependencies(t, ctx, logger) deps.Domains.Storage = domains.NewStorageDomain(deps, fs) fs.MkdirAll("thumb", 0755)