diff --git a/docs/swagger/docs.go b/docs/swagger/docs.go index 6c0915288..e785ff652 100644 --- a/docs/swagger/docs.go +++ b/docs/swagger/docs.go @@ -378,6 +378,12 @@ const docTemplate = `{ "model.BookmarkDTO": { "type": "object", "properties": { + "archivePath": { + "type": "string" + }, + "archiver": { + "type": "string" + }, "author": { "type": "string" }, diff --git a/docs/swagger/swagger.json b/docs/swagger/swagger.json index 2d2978e26..648be2012 100644 --- a/docs/swagger/swagger.json +++ b/docs/swagger/swagger.json @@ -367,6 +367,12 @@ "model.BookmarkDTO": { "type": "object", "properties": { + "archivePath": { + "type": "string" + }, + "archiver": { + "type": "string" + }, "author": { "type": "string" }, diff --git a/docs/swagger/swagger.yaml b/docs/swagger/swagger.yaml index 6917e65c5..3ecc8331b 100644 --- a/docs/swagger/swagger.yaml +++ b/docs/swagger/swagger.yaml @@ -82,6 +82,10 @@ definitions: type: object model.BookmarkDTO: properties: + archivePath: + type: string + archiver: + type: string author: type: string create_archive: diff --git a/go.mod b/go.mod index 6127ef5c8..8c4840a5a 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/go-sql-driver/mysql v1.8.1 github.com/gofrs/uuid/v5 v5.2.0 github.com/golang-jwt/jwt/v5 v5.2.1 + github.com/huandu/go-sqlbuilder v1.30.1 github.com/jmoiron/sqlx v1.4.0 github.com/julienschmidt/httprouter v1.3.0 github.com/lib/pq v1.10.9 @@ -86,6 +87,7 @@ require ( github.com/golang/protobuf v1.5.4 // indirect github.com/google/uuid v1.6.0 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/huandu/xstrings v1.4.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect diff --git a/go.sum b/go.sum index 94caf4f6b..c63f76162 100644 --- a/go.sum +++ b/go.sum @@ -129,6 +129,12 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rH github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/huandu/go-assert v1.1.6 h1:oaAfYxq9KNDi9qswn/6aE0EydfxSa+tWZC1KabNitYs= +github.com/huandu/go-assert v1.1.6/go.mod h1:JuIfbmYG9ykwvuxoJ3V8TB5QP+3+ajIA54Y44TmkMxs= +github.com/huandu/go-sqlbuilder v1.30.1 h1:rsneJuMBZcGpxK6YQcVtKclhFT0wbM2gmOqlTXaQc2w= +github.com/huandu/go-sqlbuilder v1.30.1/go.mod h1:mS0GAtrtW+XL6nM2/gXHRJax2RwSW1TraavWDFAc1JA= +github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU= +github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= diff --git a/internal/archiver/pdf.go b/internal/archiver/pdf.go new file mode 100644 index 000000000..78cebc61c --- /dev/null +++ b/internal/archiver/pdf.go @@ -0,0 +1,57 @@ +package archiver + +import ( + "fmt" + "strings" + + "github.com/go-shiori/shiori/internal/dependencies" + "github.com/go-shiori/shiori/internal/model" +) + +type PDFArchiver struct { + deps *dependencies.Dependencies +} + +func (a *PDFArchiver) Matches(archiverReq *model.ArchiverRequest) bool { + return strings.Contains(archiverReq.ContentType, "application/pdf") +} + +func (a *PDFArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) { + bookmark := &archiverReq.Bookmark + + if err := a.deps.Domains.Storage.WriteData(model.GetArchivePath(bookmark), archiverReq.Content); err != nil { + return nil, fmt.Errorf("error saving pdf archive: %v", err) + } + + bookmark.ArchivePath = model.GetArchivePath(bookmark) + bookmark.HasArchive = true + bookmark.Archiver = model.ArchiverPDF + + return bookmark, nil +} + +func (a *PDFArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { + archivePath := model.GetArchivePath(&bookmark) + + if !a.deps.Domains.Storage.FileExists(archivePath) { + return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID) + } + + archiveFile, err := a.deps.Domains.Storage.FS().Open(archivePath) + if err != nil { + return nil, fmt.Errorf("error opening pdf archive: %w", err) + } + + info, err := archiveFile.Stat() + if err != nil { + return nil, fmt.Errorf("error getting pdf archive info: %w", err) + } + + return model.NewArchiveFile(archiveFile, "application/pdf", "", info.Size()), nil +} + +func NewPDFArchiver(deps *dependencies.Dependencies) *PDFArchiver { + return &PDFArchiver{ + deps: deps, + } +} diff --git a/internal/archiver/warc.go b/internal/archiver/warc.go new file mode 100644 index 000000000..431995e54 --- /dev/null +++ b/internal/archiver/warc.go @@ -0,0 +1,78 @@ +package archiver + +import ( + "bytes" + "fmt" + "path/filepath" + "strings" + + "github.com/go-shiori/shiori/internal/core" + "github.com/go-shiori/shiori/internal/dependencies" + "github.com/go-shiori/shiori/internal/model" + "github.com/go-shiori/warc" +) + +// LEGACY WARNING +// This file contains legacy code that will be removed once we move on to Obelisk as +// general archiver. + +type WARCArchiver struct { + deps *dependencies.Dependencies +} + +func (a *WARCArchiver) Matches(archiverReq *model.ArchiverRequest) bool { + // TODO: set to true for now as catch-all but we will remove this archiver soon + return true +} + +func (a *WARCArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) { + processRequest := core.ProcessRequest{ + DataDir: a.deps.Config.Storage.DataDir, + Bookmark: archiverReq.Bookmark, + Content: bytes.NewReader(archiverReq.Content), + ContentType: archiverReq.ContentType, + } + + result, isFatalErr, err := core.ProcessBookmark(a.deps, processRequest) + + if err != nil && isFatalErr { + return nil, fmt.Errorf("failed to process: %v", err) + } + + return &result, nil +} + +func (a *WARCArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { + archivePath := model.GetArchivePath(&bookmark) + + if !a.deps.Domains.Storage.FileExists(archivePath) { + return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID) + } + + warcFile, err := warc.Open(filepath.Join(a.deps.Config.Storage.DataDir, archivePath)) + if err != nil { + return nil, fmt.Errorf("error opening warc file: %w", err) + } + + defer warcFile.Close() + + if !warcFile.HasResource(resourcePath) { + return nil, fmt.Errorf("resource %s doesn't exist in archive", resourcePath) + } + + content, contentType, err := warcFile.Read(resourcePath) + if err != nil { + return nil, fmt.Errorf("error reading resource %s: %w", resourcePath, err) + } + + // Note: Using this method to send the reader instead of `bytes.NewReader` because that + // crashes the moment we try to retrieve it for some reason. Since this is a legacy archiver + // I don't want to spend more time on this. (@fmartingr) + return model.NewArchiveFile(strings.NewReader(string(content)), contentType, "gzip", int64(len(content))), nil +} + +func NewWARCArchiver(deps *dependencies.Dependencies) *WARCArchiver { + return &WARCArchiver{ + deps: deps, + } +} diff --git a/internal/cmd/add.go b/internal/cmd/add.go index f099bcdcf..20b6b6305 100644 --- a/internal/cmd/add.go +++ b/internal/cmd/add.go @@ -29,7 +29,7 @@ func addCmd() *cobra.Command { } func addHandler(cmd *cobra.Command, args []string) { - cfg, deps := initShiori(cmd.Context(), cmd) + _, deps := initShiori(cmd.Context(), cmd) // Read flag and arguments url := args[0] @@ -38,7 +38,6 @@ func addHandler(cmd *cobra.Command, args []string) { tags, _ := cmd.Flags().GetStringSlice("tags") offline, _ := cmd.Flags().GetBool("offline") noArchival, _ := cmd.Flags().GetBool("no-archival") - logArchival, _ := cmd.Flags().GetBool("log-archival") // Normalize input title = validateTitle(title, "") @@ -84,37 +83,22 @@ func addHandler(cmd *cobra.Command, args []string) { if !offline { cInfo.Println("Downloading article...") - var isFatalErr bool - content, contentType, err := core.DownloadBookmark(book.URL) + result, err := deps.Domains.Archiver.GenerateBookmarkArchive(book) if err != nil { - cError.Printf("Failed to download: %v\n", err) + cError.Printf("Failed to download article: %v\n", err) + os.Exit(1) + } + + if title != "" { + result.Title = title } - if err == nil && content != nil { - request := core.ProcessRequest{ - DataDir: cfg.Storage.DataDir, - Bookmark: book, - Content: content, - ContentType: contentType, - LogArchival: logArchival, - KeepTitle: title != "", - KeepExcerpt: excerpt != "", - } - - book, isFatalErr, err = core.ProcessBookmark(deps, request) - content.Close() - - if err != nil { - cError.Printf("Failed: %v\n", err) - } - - if isFatalErr { - os.Exit(1) - } + if excerpt != "" { + result.Excerpt = excerpt } // Save bookmark to database - _, err = deps.Database.SaveBookmarks(cmd.Context(), false, book) + _, err = deps.Database.SaveBookmarks(cmd.Context(), false, *result) if err != nil { cError.Printf("Failed to save bookmark with content: %v\n", err) os.Exit(1) diff --git a/internal/core/core.go b/internal/core/core.go index a75f3ec49..5bf94999e 100644 --- a/internal/core/core.go +++ b/internal/core/core.go @@ -1,3 +1,9 @@ package core -const userAgent = "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)" +import ( + "fmt" + + "github.com/go-shiori/shiori/internal/model" +) + +var userAgent = fmt.Sprintf("Shiori/%s (+https://github.com/go-shiori/shiori)", model.BuildVersion) diff --git a/internal/core/ebook.go b/internal/core/ebook.go index d587f86c6..1cd6cb75b 100644 --- a/internal/core/ebook.go +++ b/internal/core/ebook.go @@ -4,7 +4,6 @@ import ( "os" fp "path/filepath" "strconv" - "strings" epub "github.com/go-shiori/go-epub" "github.com/go-shiori/shiori/internal/dependencies" @@ -15,35 +14,28 @@ import ( // GenerateEbook receives a `ProcessRequest` and generates an ebook file in the destination path specified. // The destination path `dstPath` should include file name with ".epub" extension // The bookmark model will be used to update the UI based on whether this function is successful or not. -func GenerateEbook(deps *dependencies.Dependencies, req ProcessRequest, dstPath string) (book model.BookmarkDTO, err error) { +func GenerateEbook(deps *dependencies.Dependencies, req model.EbookProcessRequest) (book model.BookmarkDTO, err error) { book = req.Bookmark + dstPath := model.GetEbookPath(&book) // Make sure bookmark ID is defined if book.ID == 0 { return book, errors.New("bookmark ID is not valid") } + if deps.Domains.Storage.FileExists(dstPath) && req.SkipExisting { + return book, nil + } + // Get current state of bookmark cheak archive and thumb strID := strconv.Itoa(book.ID) bookmarkThumbnailPath := model.GetThumbnailPath(&book) - bookmarkArchivePath := model.GetArchivePath(&book) if deps.Domains.Storage.FileExists(bookmarkThumbnailPath) { book.ImageURL = fp.Join("/", "bookmark", strID, "thumb") } - if deps.Domains.Storage.FileExists(bookmarkArchivePath) { - book.HasArchive = true - } - - // This function create ebook from reader mode of bookmark so - // we can't create ebook from PDF so we return error here if bookmark is a pdf - contentType := req.ContentType - if strings.Contains(contentType, "application/pdf") { - return book, errors.New("can't create ebook for pdf") - } - // Create temporary epub file tmpFile, err := os.CreateTemp("", "ebook") if err != nil { diff --git a/internal/core/ebook_test.go b/internal/core/ebook_test.go index 67b06d218..128b0fcc8 100644 --- a/internal/core/ebook_test.go +++ b/internal/core/ebook_test.go @@ -21,28 +21,24 @@ func TestGenerateEbook(t *testing.T) { t.Run("Successful ebook generate", func(t *testing.T) { t.Run("valid bookmarkId that return HasEbook true", func(t *testing.T) { - dstFile := "/ebook/1.epub" tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) - mockRequest := core.ProcessRequest{ + mockRequest := model.EbookProcessRequest{ Bookmark: model.BookmarkDTO{ ID: 1, Title: "Example Bookmark", HTML: "Example HTML", HasEbook: false, }, - DataDir: tmpDir, - ContentType: "text/html", } - bookmark, err := core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err := core.GenerateEbook(deps, mockRequest) assert.True(t, bookmark.HasEbook) assert.NoError(t, err) }) t.Run("ebook generate with valid BookmarkID EbookExist ImagePathExist ReturnWithHasEbookTrue", func(t *testing.T) { - dstFile := "/ebook/2.epub" tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) @@ -51,10 +47,8 @@ func TestGenerateEbook(t *testing.T) { ID: 2, HasEbook: false, } - mockRequest := core.ProcessRequest{ - Bookmark: bookmark, - DataDir: tmpDir, - ContentType: "text/html", + mockRequest := model.EbookProcessRequest{ + Bookmark: bookmark, } // Create the thumbnail file imagePath := model.GetThumbnailPath(&bookmark) @@ -66,14 +60,13 @@ func TestGenerateEbook(t *testing.T) { } defer file.Close() - bookmark, err = core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err = core.GenerateEbook(deps, mockRequest) expectedImagePath := string(fp.Separator) + fp.Join("bookmark", "2", "thumb") assert.NoError(t, err) assert.True(t, bookmark.HasEbook) assert.Equalf(t, expectedImagePath, bookmark.ImageURL, "Expected imageURL %s, but got %s", expectedImagePath, bookmark.ImageURL) }) - t.Run("generate ebook valid BookmarkID EbookExist ReturnHasArchiveTrue", func(t *testing.T) { - dstFile := "/ebook/3.epub" + t.Run("generate ebook valid BookmarkID EbookExist", func(t *testing.T) { tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) @@ -82,10 +75,8 @@ func TestGenerateEbook(t *testing.T) { ID: 3, HasEbook: false, } - mockRequest := core.ProcessRequest{ - Bookmark: bookmark, - DataDir: tmpDir, - ContentType: "text/html", + mockRequest := model.EbookProcessRequest{ + Bookmark: bookmark, } // Create the archive file archivePath := model.GetArchivePath(&bookmark) @@ -97,25 +88,20 @@ func TestGenerateEbook(t *testing.T) { } defer file.Close() - bookmark, err = core.GenerateEbook(deps, mockRequest, fp.Join(dstFile, "1")) - assert.True(t, bookmark.HasArchive) + bookmark, err = core.GenerateEbook(deps, mockRequest) assert.NoError(t, err) }) }) t.Run("specific ebook generate case", func(t *testing.T) { t.Run("invalid bookmarkId that return Error", func(t *testing.T) { - dstFile := "/ebook/0.epub" - tmpDir := t.TempDir() - mockRequest := core.ProcessRequest{ + mockRequest := model.EbookProcessRequest{ Bookmark: model.BookmarkDTO{ ID: 0, HasEbook: false, }, - DataDir: tmpDir, - ContentType: "text/html", } - bookmark, err := core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err := core.GenerateEbook(deps, mockRequest) assert.Equal(t, model.BookmarkDTO{ ID: 0, @@ -124,7 +110,6 @@ func TestGenerateEbook(t *testing.T) { assert.EqualError(t, err, "bookmark ID is not valid") }) t.Run("ebook exist return HasEbook true", func(t *testing.T) { - dstFile := "/ebook/1.epub" tmpDir := t.TempDir() deps.Domains.Storage = domains.NewStorageDomain(deps, afero.NewBasePathFs(afero.NewOsFs(), tmpDir)) @@ -133,10 +118,8 @@ func TestGenerateEbook(t *testing.T) { ID: 1, HasEbook: false, } - mockRequest := core.ProcessRequest{ - Bookmark: bookmark, - DataDir: tmpDir, - ContentType: "text/html", + mockRequest := model.EbookProcessRequest{ + Bookmark: bookmark, } // Create the ebook file ebookPath := model.GetEbookPath(&bookmark) @@ -148,29 +131,10 @@ func TestGenerateEbook(t *testing.T) { } defer file.Close() - bookmark, err = core.GenerateEbook(deps, mockRequest, dstFile) + bookmark, err = core.GenerateEbook(deps, mockRequest) assert.True(t, bookmark.HasEbook) assert.NoError(t, err) }) - t.Run("generate ebook valid BookmarkID RetuenError for PDF file", func(t *testing.T) { - dstFile := "/ebook/1.epub" - tmpDir := t.TempDir() - - mockRequest := core.ProcessRequest{ - Bookmark: model.BookmarkDTO{ - ID: 1, - HasEbook: false, - }, - DataDir: tmpDir, - ContentType: "application/pdf", - } - - bookmark, err := core.GenerateEbook(deps, mockRequest, dstFile) - - assert.False(t, bookmark.HasEbook) - assert.Error(t, err) - assert.EqualError(t, err, "can't create ebook for pdf") - }) }) } diff --git a/internal/core/processing.go b/internal/core/processing.go index 149fcc121..297b0f301 100644 --- a/internal/core/processing.go +++ b/internal/core/processing.go @@ -145,13 +145,12 @@ func ProcessBookmark(deps *dependencies.Dependencies, req ProcessRequest) (book // If needed, create ebook as well if book.CreateEbook { - ebookPath := model.GetEbookPath(&book) - req.Bookmark = book - if strings.Contains(contentType, "application/pdf") { return book, false, errors.Wrap(err, "can't create ebook from pdf") } else { - _, err = GenerateEbook(deps, req, ebookPath) + _, err = GenerateEbook(deps, model.EbookProcessRequest{ + Bookmark: book, + }) if err != nil { return book, true, errors.Wrap(err, "failed to create ebook") } @@ -189,6 +188,8 @@ func ProcessBookmark(deps *dependencies.Dependencies, req ProcessRequest) (book book.HasArchive = true book.ModifiedAt = "" + book.ArchivePath = dstPath + book.Archiver = model.ArchiverWARC } return book, false, nil diff --git a/internal/database/database_test.go b/internal/database/database_test.go index e05034739..9395a7d4c 100644 --- a/internal/database/database_test.go +++ b/internal/database/database_test.go @@ -226,8 +226,10 @@ func testGetBookmark(t *testing.T, db DB) { ctx := context.TODO() book := model.BookmarkDTO{ - URL: "https://github.com/go-shiori/shiori", - Title: "shiori", + URL: "https://github.com/go-shiori/shiori", + Title: "shiori", + Archiver: model.ArchiverPDF, + ArchivePath: "test", } result, err := db.SaveBookmarks(ctx, true, book) @@ -238,6 +240,9 @@ func testGetBookmark(t *testing.T, db DB) { assert.NoError(t, err, "Get bookmark should not fail") assert.Equal(t, result[0].ID, savedBookmark.ID, "Retrieved bookmark should be the same") assert.Equal(t, book.URL, savedBookmark.URL, "Retrieved bookmark should be the same") + assert.Equal(t, book.Title, savedBookmark.Title, "Retrieved bookmark should be the same") + assert.Equal(t, book.Archiver, savedBookmark.Archiver, "Retrieved bookmark should be the same") + assert.Equal(t, book.ArchivePath, savedBookmark.ArchivePath, "Retrieved bookmark should be the same") } func testGetBookmarkNotExistent(t *testing.T, db DB) { diff --git a/internal/database/migrations.go b/internal/database/migrations.go index dc4208491..c509d2d84 100644 --- a/internal/database/migrations.go +++ b/internal/database/migrations.go @@ -13,10 +13,12 @@ import ( //go:embed migrations/* var migrationFiles embed.FS +type migrationFunc func(db *sql.DB) error + type migration struct { fromVersion semver.Version toVersion semver.Version - migrationFunc func(db *sql.DB) error + migrationFunc migrationFunc } // txFunc is a function that runs in a transaction. @@ -42,7 +44,7 @@ func runInTransaction(db *sql.DB, fn txFn) error { } // newFuncMigration creates a new migration from a function. -func newFuncMigration(fromVersion, toVersion string, migrationFunc func(db *sql.DB) error) migration { +func newFuncMigration(fromVersion, toVersion string, migrationFunc migrationFunc) migration { return migration{ fromVersion: semver.MustParse(fromVersion), toVersion: semver.MustParse(toVersion), diff --git a/internal/database/migrations/0001_migrate_archiver.go b/internal/database/migrations/0001_migrate_archiver.go new file mode 100644 index 000000000..40b500d49 --- /dev/null +++ b/internal/database/migrations/0001_migrate_archiver.go @@ -0,0 +1,141 @@ +package migrations + +import ( + "context" + "database/sql" + "fmt" + "os" + "path/filepath" + "slices" + + "github.com/huandu/go-sqlbuilder" + "github.com/jmoiron/sqlx" + gap "github.com/muesli/go-app-paths" +) + +// getPortableModeEnabled_171 checks if portable mode is enabled by naively checking the +// os.Args for the --portable flag. This is a workaround to use in this migration with the +// current state of the code as of 1.7.1. +func getPortableModeEnabled_171() bool { + return slices.Contains(os.Args, "--portable") +} + +// getStorageDirectory_170 returns the directory where shiori data is stored +// for the 1.7.1 version of shiori. +// This function is just a copy of the original as of 1.7.1. +func getStorageDirectory_171(portableMode bool) (string, error) { + // If in portable mode, uses directory of executable + if portableMode { + exePath, err := os.Executable() + if err != nil { + return "", err + } + + exeDir := filepath.Dir(exePath) + return filepath.Join(exeDir, "shiori-data"), nil + } + + // Try to use platform specific app path + userScope := gap.NewScope(gap.User, "shiori") + dataDir, err := userScope.DataPath("") + if err == nil { + return dataDir, nil + } + + return "", fmt.Errorf("couldn't determine the data directory") +} + +// getDataDir_171 returns the directory where shiori data is stored using the logic flow +// of the 1.7.1 version of shiori. +func getDataDir_171() (string, error) { + dataDir := os.Getenv("SHIORI_DIR") + if dataDir == "" { + var err error + dataDir, err = getStorageDirectory_171(getPortableModeEnabled_171()) + if err != nil { + return "", fmt.Errorf("failed to get data directory: %w", err) + } + } + return dataDir, nil +} + +// MigrateArchiver adds new columns for the archiver and archiver_path +// This migration manually checks that the existing bookmarks have a file in the default archive path: +// SHIORI_DIR/archives/ID +// If the file exists, it will update the archiver=warc (the only one at this point) and archiver_path=path +// This migration is driver agnostic. +func MigrateArchiverMigration(sqlDB *sql.DB, driver string) error { + var flavor sqlbuilder.Flavor + switch driver { + case "mysql": + flavor = sqlbuilder.MySQL + case "postgres": + flavor = sqlbuilder.PostgreSQL + case "sqlite": + flavor = sqlbuilder.SQLite + default: + return fmt.Errorf("unsupported driver: %s", driver) + } + + ctx := context.Background() + sqlX := sqlx.NewDb(sqlDB, driver) + + tx, err := sqlX.Begin() + if err != nil { + return fmt.Errorf("failed to start transaction: %w", err) + } + defer tx.Rollback() + + perPage := 50 + page := 1 + + for { + var bookmarkIDs []int + sb := sqlbuilder.NewSelectBuilder() + sb.SetFlavor(flavor) + sb.Select("id") + sb.From("bookmark") + sb.OrderBy("id ASC") + sb.Where(sb.Equal("archiver", "")) + sb.Limit(perPage) + sb.Offset((page - 1) * perPage) + + sqlQuery, args := sb.Build() + if err := sqlX.Select(&bookmarkIDs, sqlQuery, args...); err != nil { + return fmt.Errorf("failed to get bookmarks: %w", err) + } + + if len(bookmarkIDs) == 0 { + break + } + + dataDir, err := getDataDir_171() + if err != nil { + return fmt.Errorf("failed to get data directory: %w", err) + } + + for _, bookID := range bookmarkIDs { + archivePath := filepath.Join(dataDir, "archive", fmt.Sprintf("%d", bookID)) + + // If the file exists, we assume it's a WARC file and update the row + if _, err := os.Stat(archivePath); err == nil { + sb := sqlbuilder.NewUpdateBuilder() + sb.Update("bookmark") + sb.Set( + sb.Assign("archiver", "warc"), + sb.Assign("archive_path", archivePath), + ) + sb.Where(sb.Equal("id", bookID)) + + sqlQuery, args := sb.Build() + if _, err := tx.ExecContext(ctx, sqlQuery, args...); err != nil { + return fmt.Errorf("failed to update bookmark %d: %w", bookID, err) + } + } + } + + page++ + } + + return tx.Commit() +} diff --git a/internal/database/migrations/mysql/0011_bookmark_archiver.up.sql b/internal/database/migrations/mysql/0011_bookmark_archiver.up.sql new file mode 100644 index 000000000..3c23d72a8 --- /dev/null +++ b/internal/database/migrations/mysql/0011_bookmark_archiver.up.sql @@ -0,0 +1,3 @@ +ALTER TABLE bookmark + ADD COLUMN archiver TEXT NOT NULL DEFAULT '', + ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; diff --git a/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql b/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql new file mode 100644 index 000000000..3c23d72a8 --- /dev/null +++ b/internal/database/migrations/postgres/0003_bookmark_archiver.up.sql @@ -0,0 +1,3 @@ +ALTER TABLE bookmark + ADD COLUMN archiver TEXT NOT NULL DEFAULT '', + ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; diff --git a/internal/database/migrations/sqlite/0005_bookmark_archiver.up.sql b/internal/database/migrations/sqlite/0005_bookmark_archiver.up.sql new file mode 100644 index 000000000..b0ac92825 --- /dev/null +++ b/internal/database/migrations/sqlite/0005_bookmark_archiver.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE bookmark ADD COLUMN archiver TEXT NOT NULL DEFAULT ''; +ALTER TABLE bookmark ADD COLUMN archive_path TEXT NOT NULL DEFAULT ''; diff --git a/internal/database/mysql.go b/internal/database/mysql.go index 27db774d5..29c8331db 100644 --- a/internal/database/mysql.go +++ b/internal/database/mysql.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "github.com/go-shiori/shiori/internal/database/migrations" "github.com/go-shiori/shiori/internal/model" "github.com/jmoiron/sqlx" "github.com/pkg/errors" @@ -67,6 +68,10 @@ var mysqlMigrations = []migration{ newFileMigration("0.8.2", "0.8.3", "mysql/0008_set_modified_at_equal_created_at"), newFileMigration("0.8.3", "0.8.4", "mysql/0009_index_for_created_at"), newFileMigration("0.8.4", "0.8.5", "mysql/0010_index_for_modified_at"), + newFileMigration("0.8.5", "0.9.0", "mysql/0011_bookmark_archiver"), + newFuncMigration("0.9.0", "0.9.1", func(db *sql.DB) error { + return migrations.MigrateArchiverMigration(db, "mysql") + }), } // MySQLDatabase is implementation of Database interface @@ -75,6 +80,10 @@ type MySQLDatabase struct { dbbase } +func mysqlDatabaseFromDB(db *sqlx.DB) *MySQLDatabase { + return &MySQLDatabase{dbbase: dbbase{db}} +} + // OpenMySQLDatabase creates and opens connection to a MySQL Database. func OpenMySQLDatabase(ctx context.Context, connString string) (mysqlDB *MySQLDatabase, err error) { // Open database and start transaction @@ -86,8 +95,7 @@ func OpenMySQLDatabase(ctx context.Context, connString string) (mysqlDB *MySQLDa db.SetMaxOpenConns(100) db.SetConnMaxLifetime(time.Second) // in case mysql client has longer timeout (driver issue #674) - mysqlDB = &MySQLDatabase{dbbase: dbbase{db}} - return mysqlDB, err + return mysqlDatabaseFromDB(db), err } // DBX returns the underlying sqlx.DB object @@ -137,8 +145,8 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar if err := db.withTx(ctx, func(tx *sqlx.Tx) error { // Prepare statement stmtInsertBook, err := tx.Preparex(`INSERT INTO bookmark - (url, title, excerpt, author, public, content, html, modified_at, created_at) - VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)`) + (url, title, excerpt, author, public, content, html, modified_at, created_at, archiver, archive_path) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) if err != nil { return errors.WithStack(err) } @@ -151,7 +159,9 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar public = ?, content = ?, html = ?, - modified_at = ? + modified_at = ?, + archiver = ?, + archive_path = ? WHERE id = ?`) if err != nil { return errors.WithStack(err) @@ -206,7 +216,8 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar var res sql.Result res, err = stmtInsertBook.ExecContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.ModifiedAt, book.CreatedAt) + book.Public, book.Content, book.HTML, book.ModifiedAt, book.CreatedAt, + book.Archiver, book.ArchivePath) if err != nil { return errors.WithStack(err) } @@ -218,7 +229,8 @@ func (db *MySQLDatabase) SaveBookmarks(ctx context.Context, create bool, bookmar } else { _, err = stmtUpdateBook.ExecContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.ModifiedAt, book.ID) + book.Public, book.Content, book.HTML, book.ModifiedAt, + book.Archiver, book.ArchivePath, book.ID) } if err != nil { return errors.WithStack(err) @@ -294,7 +306,9 @@ func (db *MySQLDatabase) GetBookmarks(ctx context.Context, opts GetBookmarksOpti `public`, `created_at`, `modified_at`, - `content <> "" has_content`} + `content <> "" has_content`, + `archiver`, + `archive_path`} if opts.WithContent { columns = append(columns, `content`, `html`) @@ -572,7 +586,8 @@ func (db *MySQLDatabase) GetBookmark(ctx context.Context, id int, url string) (m args := []interface{}{id} query := `SELECT id, url, title, excerpt, author, public, - content, html, modified_at, created_at, content <> '' has_content + content, html, modified_at, created_at, content <> '' has_content, + archiver, archive_path FROM bookmark WHERE id = ?` if url != "" { diff --git a/internal/database/mysql_test.go b/internal/database/mysql_test.go index 5ee4e3587..ded0a7951 100644 --- a/internal/database/mysql_test.go +++ b/internal/database/mysql_test.go @@ -39,6 +39,7 @@ func mysqlTestDatabaseFactory(_ *testing.T, ctx context.Context) (DB, error) { } _, err = tx.ExecContext(ctx, "CREATE DATABASE "+dbname) + return err }) if err != nil { @@ -53,9 +54,9 @@ func mysqlTestDatabaseFactory(_ *testing.T, ctx context.Context) (DB, error) { return nil, err } - return db, err + return db, nil } -func TestMysqlsDatabase(t *testing.T) { +func TestMySQLDatabase(t *testing.T) { testDatabase(t, mysqlTestDatabaseFactory) } diff --git a/internal/database/pg.go b/internal/database/pg.go index 66df2dd8d..97e498378 100644 --- a/internal/database/pg.go +++ b/internal/database/pg.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "github.com/go-shiori/shiori/internal/database/migrations" "github.com/go-shiori/shiori/internal/model" "github.com/jmoiron/sqlx" "github.com/pkg/errors" @@ -58,6 +59,10 @@ var postgresMigrations = []migration{ return nil }), newFileMigration("0.3.0", "0.4.0", "postgres/0002_created_time"), + newFileMigration("0.4.0", "0.5.0", "postgres/0003_bookmark_archiver"), + newFuncMigration("0.5.0", "0.5.1", func(db *sql.DB) error { + return migrations.MigrateArchiverMigration(db, "postgres") + }), } // PGDatabase is implementation of Database interface @@ -66,6 +71,10 @@ type PGDatabase struct { dbbase } +func postgresDatabaseFromDB(db *sqlx.DB) *PGDatabase { + return &PGDatabase{dbbase: dbbase{db}} +} + // OpenPGDatabase creates and opens connection to a PostgreSQL Database. func OpenPGDatabase(ctx context.Context, connString string) (pgDB *PGDatabase, err error) { // Open database and start transaction @@ -77,8 +86,7 @@ func OpenPGDatabase(ctx context.Context, connString string) (pgDB *PGDatabase, e db.SetMaxOpenConns(100) db.SetConnMaxLifetime(time.Second) - pgDB = &PGDatabase{dbbase: dbbase{db}} - return pgDB, err + return postgresDatabaseFromDB(db), err } // DBX returns the underlying sqlx.DB object @@ -129,8 +137,8 @@ func (db *PGDatabase) SaveBookmarks(ctx context.Context, create bool, bookmarks if err := db.withTx(ctx, func(tx *sqlx.Tx) error { // Prepare statement stmtInsertBook, err := tx.Preparex(`INSERT INTO bookmark - (url, title, excerpt, author, public, content, html, modified_at, created_at) - VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9) + (url, title, excerpt, author, public, content, html, modified_at, created_at, archiver, archive_path) + VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) RETURNING id`) if err != nil { return errors.WithStack(err) @@ -144,8 +152,10 @@ func (db *PGDatabase) SaveBookmarks(ctx context.Context, create bool, bookmarks public = $5, content = $6, html = $7, - modified_at = $8 - WHERE id = $9`) + modified_at = $8, + archiver = $9, + archive_path = $10 + WHERE id = $11`) if err != nil { return errors.WithStack(err) } @@ -198,11 +208,14 @@ func (db *PGDatabase) SaveBookmarks(ctx context.Context, create bool, bookmarks book.CreatedAt = modifiedTime err = stmtInsertBook.QueryRowContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.ModifiedAt, book.CreatedAt).Scan(&book.ID) + book.Public, book.Content, book.HTML, book.ModifiedAt, book.CreatedAt, + book.Archiver, book.ArchivePath).Scan(&book.ID) } else { _, err = stmtUpdateBook.ExecContext(ctx, book.URL, book.Title, book.Excerpt, book.Author, - book.Public, book.Content, book.HTML, book.ModifiedAt, book.ID) + book.Public, book.Content, book.HTML, book.ModifiedAt, + book.Archiver, book.ArchivePath, + book.ID) } if err != nil { return errors.WithStack(err) @@ -272,9 +285,11 @@ func (db *PGDatabase) GetBookmarks(ctx context.Context, opts GetBookmarksOptions `excerpt`, `author`, `public`, - `created_at`, `modified_at`, - `content <> '' has_content`} + `created_at`, + `content <> '' has_content, + archiver, + archive_path`} if opts.WithContent { columns = append(columns, `content`, `html`) @@ -573,7 +588,8 @@ func (db *PGDatabase) GetBookmark(ctx context.Context, id int, url string) (mode args := []interface{}{id} query := `SELECT id, url, title, excerpt, author, public, - content, html, modified_at, created_at, content <> '' has_content + content, html, modified_at, created_at, content <> '' has_content, + archiver, archive_path FROM bookmark WHERE id = $1` if url != "" { diff --git a/internal/database/sqlite.go b/internal/database/sqlite.go index 0675db6aa..77dcfdef6 100644 --- a/internal/database/sqlite.go +++ b/internal/database/sqlite.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/go-shiori/shiori/internal/database/migrations" "github.com/go-shiori/shiori/internal/model" "github.com/jmoiron/sqlx" "github.com/pkg/errors" @@ -61,6 +62,14 @@ var sqliteMigrations = []migration{ newFileMigration("0.3.0", "0.4.0", "sqlite/0002_denormalize_content"), newFileMigration("0.4.0", "0.5.0", "sqlite/0003_uniq_id"), newFileMigration("0.5.0", "0.6.0", "sqlite/0004_created_time"), + newFileMigration("0.6.0", "0.7.0", "sqlite/0005_bookmark_archiver"), + newFuncMigration("0.7.0", "0.8.0", func(db *sql.DB) error { + return migrations.MigrateArchiverMigration(db, "sqlite") + }), +} + +func sqliteDatabaseFromDB(db *sqlx.DB) *SQLiteDatabase { + return &SQLiteDatabase{dbbase: dbbase{db}} } // SQLiteDatabase is implementation of Database interface @@ -128,15 +137,16 @@ func (db *SQLiteDatabase) SaveBookmarks(ctx context.Context, create bool, bookma // Prepare statement stmtInsertBook, err := tx.PreparexContext(ctx, `INSERT INTO bookmark - (url, title, excerpt, author, public, modified_at, has_content, created_at) - VALUES(?, ?, ?, ?, ?, ?, ?, ?) RETURNING id`) + (url, title, excerpt, author, public, modified_at, created_at, has_content, archiver, archive_path) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING id`) if err != nil { return errors.WithStack(err) } stmtUpdateBook, err := tx.PreparexContext(ctx, `UPDATE bookmark SET url = ?, title = ?, excerpt = ?, author = ?, - public = ?, modified_at = ?, has_content = ? + public = ?, modified_at = ?, has_content = ?, + archiver = ?, archive_path = ? WHERE id = ?`) if err != nil { return errors.WithStack(err) @@ -205,10 +215,10 @@ func (db *SQLiteDatabase) SaveBookmarks(ctx context.Context, create bool, bookma if create { book.CreatedAt = modifiedTime err = stmtInsertBook.QueryRowContext(ctx, - book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.ModifiedAt, hasContent, book.CreatedAt).Scan(&book.ID) + book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.ModifiedAt, book.CreatedAt, hasContent, book.Archiver, book.ArchivePath).Scan(&book.ID) } else { _, err = stmtUpdateBook.ExecContext(ctx, - book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.ModifiedAt, hasContent, book.ID) + book.URL, book.Title, book.Excerpt, book.Author, book.Public, book.ModifiedAt, hasContent, book.Archiver, book.ArchivePath, book.ID) } if err != nil { return errors.WithStack(err) @@ -302,7 +312,9 @@ func (db *SQLiteDatabase) GetBookmarks(ctx context.Context, opts GetBookmarksOpt b.public, b.created_at, b.modified_at, - b.has_content + b.has_content, + b.archiver, + b.archive_path FROM bookmark b WHERE 1` @@ -673,7 +685,8 @@ func (db *SQLiteDatabase) GetBookmark(ctx context.Context, id int, url string) ( args := []interface{}{id} query := `SELECT b.id, b.url, b.title, b.excerpt, b.author, b.public, b.modified_at, - bc.content, bc.html, b.has_content, b.created_at + bc.content, bc.html, b.has_content, b.created_at, + b.archiver, b.archive_path FROM bookmark b LEFT JOIN bookmark_content bc ON bc.docid = b.id WHERE b.id = ?` diff --git a/internal/database/sqlite_noncgo.go b/internal/database/sqlite_noncgo.go index f2ee6b9c1..10b39b239 100644 --- a/internal/database/sqlite_noncgo.go +++ b/internal/database/sqlite_noncgo.go @@ -20,6 +20,5 @@ func OpenSQLiteDatabase(ctx context.Context, databasePath string) (sqliteDB *SQL return nil, errors.WithStack(err) } - sqliteDB = &SQLiteDatabase{dbbase: dbbase{db}} - return sqliteDB, nil + return sqliteDatabaseFromDB(db), nil } diff --git a/internal/database/sqlite_openbsd.go b/internal/database/sqlite_openbsd.go index 64d9c7d00..404017f79 100644 --- a/internal/database/sqlite_openbsd.go +++ b/internal/database/sqlite_openbsd.go @@ -21,6 +21,5 @@ func OpenSQLiteDatabase(ctx context.Context, databasePath string) (sqliteDB *SQL return nil, errors.WithStack(err) } - sqliteDB = &SQLiteDatabase{dbbase: dbbase{db}} - return sqliteDB, nil + return sqliteDatabaseFromDB(db), nil } diff --git a/internal/domains/archiver.go b/internal/domains/archiver.go index 358523b54..69206f4f3 100644 --- a/internal/domains/archiver.go +++ b/internal/domains/archiver.go @@ -1,55 +1,101 @@ package domains import ( + "context" "fmt" - "path/filepath" + "io" + "github.com/go-shiori/shiori/internal/archiver" "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/model" - "github.com/go-shiori/warc" ) type ArchiverDomain struct { - deps *dependencies.Dependencies + deps *dependencies.Dependencies + archivers map[string]model.Archiver } -func (d *ArchiverDomain) DownloadBookmarkArchive(book model.BookmarkDTO) (*model.BookmarkDTO, error) { +func (d *ArchiverDomain) GenerateBookmarkArchive(book model.BookmarkDTO) (*model.BookmarkDTO, error) { content, contentType, err := core.DownloadBookmark(book.URL) if err != nil { return nil, fmt.Errorf("error downloading url: %s", err) } - processRequest := core.ProcessRequest{ - DataDir: d.deps.Config.Storage.DataDir, - Bookmark: book, - Content: content, - ContentType: contentType, + contentBytes, err := io.ReadAll(content) + if err != nil { + return nil, fmt.Errorf("error reading content: %s", err) } - - result, isFatalErr, err := core.ProcessBookmark(d.deps, processRequest) content.Close() - if err != nil && isFatalErr { - return nil, fmt.Errorf("failed to process: %v", err) + archiverReq := model.NewArchiverRequest(book, contentType, contentBytes) + + processedBookmark, err := d.ProcessBookmarkArchive(archiverReq) + if err != nil { + return nil, fmt.Errorf("error processing bookmark archive: %w", err) + } + + saved, err := d.deps.Database.SaveBookmarks(context.Background(), false, *processedBookmark) + if err != nil { + return nil, fmt.Errorf("error saving bookmark: %w", err) } - return &result, nil + return &saved[0], nil } -func (d *ArchiverDomain) GetBookmarkArchive(book *model.BookmarkDTO) (*warc.Archive, error) { - archivePath := model.GetArchivePath(book) +func (d *ArchiverDomain) GenerateBookmarkEbook(request model.EbookProcessRequest) error { + _, err := core.GenerateEbook(d.deps, request) + if err != nil { + return fmt.Errorf("error generating ebook: %s", err) + } + + return nil +} - if !d.deps.Domains.Storage.FileExists(archivePath) { - return nil, fmt.Errorf("archive for bookmark %d doesn't exist", book.ID) +func (d *ArchiverDomain) ProcessBookmarkArchive(archiverRequest *model.ArchiverRequest) (*model.BookmarkDTO, error) { + for _, archiver := range d.archivers { + if archiver.Matches(archiverRequest) { + book, err := archiver.Archive(archiverRequest) + if err != nil { + d.deps.Log.Errorf("Error archiving bookmark with archviver: %s", err) + continue + } + return book, nil + } } - // FIXME: This only works in local filesystem - return warc.Open(filepath.Join(d.deps.Config.Storage.DataDir, archivePath)) + return nil, fmt.Errorf("no archiver found for request: %s", archiverRequest.String()) +} + +func (d *ArchiverDomain) GetBookmarkArchiveFile(book *model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) { + archiver, err := d.GetArchiver(book.Archiver) + if err != nil { + return nil, err + } + + archiveFile, err := archiver.GetArchiveFile(*book, resourcePath) + if err != nil { + return nil, fmt.Errorf("error getting archive file: %w", err) + } + + return archiveFile, nil +} + +func (d *ArchiverDomain) GetArchiver(name string) (model.Archiver, error) { + archiver, ok := d.archivers[name] + if !ok { + return nil, fmt.Errorf("archiver %s not found", name) + } + return archiver, nil } func NewArchiverDomain(deps *dependencies.Dependencies) *ArchiverDomain { + archivers := map[string]model.Archiver{ + model.ArchiverPDF: archiver.NewPDFArchiver(deps), + model.ArchiverWARC: archiver.NewWARCArchiver(deps), + } return &ArchiverDomain{ - deps: deps, + deps: deps, + archivers: archivers, } } diff --git a/internal/domains/bookmarks_test.go b/internal/domains/bookmarks_test.go index c02f16960..60dabaf89 100644 --- a/internal/domains/bookmarks_test.go +++ b/internal/domains/bookmarks_test.go @@ -4,9 +4,6 @@ import ( "context" "testing" - "github.com/go-shiori/shiori/internal/config" - "github.com/go-shiori/shiori/internal/database" - "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/domains" "github.com/go-shiori/shiori/internal/model" "github.com/go-shiori/shiori/internal/testutil" @@ -17,17 +14,10 @@ import ( func TestBookmarkDomain(t *testing.T) { fs := afero.NewMemMapFs() + ctx := context.Background() + logger := logrus.New() - db, err := database.OpenSQLiteDatabase(context.TODO(), ":memory:") - require.NoError(t, err) - require.NoError(t, db.Migrate(context.TODO())) - - deps := &dependencies.Dependencies{ - Database: db, - Config: config.ParseServerConfiguration(context.TODO(), logrus.New()), - Log: logrus.New(), - Domains: &dependencies.Domains{}, - } + _, deps := testutil.GetTestConfigurationAndDependencies(t, ctx, logger) deps.Domains.Storage = domains.NewStorageDomain(deps, fs) fs.MkdirAll("thumb", 0755) diff --git a/internal/domains/storage.go b/internal/domains/storage.go index 9cb49dd40..c4c0c71d0 100644 --- a/internal/domains/storage.go +++ b/internal/domains/storage.go @@ -97,3 +97,26 @@ func (d *StorageDomain) WriteFile(dst string, tmpFile *os.File) error { return nil } + +// WriteReader writes a reader to storage. +func (d *StorageDomain) WriteReader(dst string, reader io.Reader) error { + if dst != "" && !d.DirExists(dst) { + err := d.fs.MkdirAll(filepath.Dir(dst), model.DataDirPerm) + if err != nil { + return fmt.Errorf("failed to create destination dir: %v", err) + } + } + + dstFile, err := d.fs.Create(dst) + if err != nil { + return fmt.Errorf("failed to create destination file: %v", err) + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, reader) + if err != nil { + return fmt.Errorf("failed to copy file to the destination") + } + + return nil +} diff --git a/internal/http/routes/api/v1/bookmarks.go b/internal/http/routes/api/v1/bookmarks.go index a95ec538b..4bf97ff50 100644 --- a/internal/http/routes/api/v1/bookmarks.go +++ b/internal/http/routes/api/v1/bookmarks.go @@ -3,13 +3,10 @@ package api_v1 import ( "fmt" "net/http" - "os" - fp "path/filepath" "strconv" "sync" "github.com/gin-gonic/gin" - "github.com/go-shiori/shiori/internal/core" "github.com/go-shiori/shiori/internal/database" "github.com/go-shiori/shiori/internal/dependencies" "github.com/go-shiori/shiori/internal/http/context" @@ -186,35 +183,7 @@ func (r *BookmarksAPIRoutes) updateCache(c *gin.Context) { <-semaphore }() - // Download data from internet - content, contentType, err := core.DownloadBookmark(book.URL) - if err != nil { - chProblem <- book.ID - return - } - - request := core.ProcessRequest{ - DataDir: r.deps.Config.Storage.DataDir, - Bookmark: book, - Content: content, - ContentType: contentType, - KeepTitle: keep_metadata, - KeepExcerpt: keep_metadata, - } - - if payload.SkipExist && book.CreateEbook { - strID := strconv.Itoa(book.ID) - ebookPath := fp.Join(request.DataDir, "ebook", strID+".epub") - _, err = os.Stat(ebookPath) - if err == nil { - request.Bookmark.CreateEbook = false - request.Bookmark.HasEbook = true - } - } - - book, _, err = core.ProcessBookmark(r.deps, request) - content.Close() - + result, err := r.deps.Domains.Archiver.GenerateBookmarkArchive(book) if err != nil { r.logger.WithFields(logrus.Fields{ "bookmark_id": book.ID, @@ -225,6 +194,29 @@ func (r *BookmarksAPIRoutes) updateCache(c *gin.Context) { return } + // If user want to keep metadata, restore it + if keep_metadata { + result.Title = book.Title + result.Excerpt = book.Excerpt + } + + // Create ebook if needed + if payload.CreateEbook { + err = r.deps.Domains.Archiver.GenerateBookmarkEbook(model.EbookProcessRequest{ + Bookmark: *result, + SkipExisting: payload.SkipExist, + }) + if err != nil { + r.logger.WithFields(logrus.Fields{ + "bookmark_id": book.ID, + "url": book.URL, + "error": err, + }).Error("error generating ebook") + chProblem <- book.ID + return + } + } + // Update list of bookmarks mx.Lock() bookmarks[i] = book diff --git a/internal/http/routes/bookmark.go b/internal/http/routes/bookmark.go index e0073dcdd..6a23d3695 100644 --- a/internal/http/routes/bookmark.go +++ b/internal/http/routes/bookmark.go @@ -123,33 +123,26 @@ func (r *BookmarkRoutes) bookmarkArchiveFileHandler(c *gin.Context) { resourcePath, _ := c.Params.Get("filepath") resourcePath = strings.TrimPrefix(resourcePath, "/") - archive, err := r.deps.Domains.Archiver.GetBookmarkArchive(bookmark) + archiveFile, err := r.deps.Domains.Archiver.GetBookmarkArchiveFile(bookmark, resourcePath) if err != nil { - r.logger.WithError(err).Error("error opening archive") + r.logger.WithError(err).Error("error getting archive file") response.SendInternalServerError(c) return } - defer archive.Close() - if !archive.HasResource(resourcePath) { - response.NotFound(c) - return - } - - content, resourceContentType, err := archive.Read(resourcePath) - if err != nil { - r.logger.WithError(err).Error("error reading archive file") - response.SendInternalServerError(c) - return - } + r.logger.Warn(archiveFile) // Generate weak ETAG shioriUUID := uuid.NewV5(uuid.NamespaceURL, model.ShioriURLNamespace) - c.Header("Etag", fmt.Sprintf("W/%s", uuid.NewV5(shioriUUID, fmt.Sprintf("%x-%x-%x", bookmark.ID, resourcePath, len(content))))) + c.Header("Etag", fmt.Sprintf("W/%s", uuid.NewV5(shioriUUID, fmt.Sprintf("%x-%x-%x", bookmark.ID, resourcePath, archiveFile.Size())))) c.Header("Cache-Control", "max-age=31536000") - c.Header("Content-Encoding", "gzip") - c.Data(http.StatusOK, resourceContentType, content) + c.DataFromReader( + http.StatusOK, + archiveFile.Size(), + archiveFile.ContentType(), + archiveFile.Reader(), + archiveFile.AsHTTPHeaders()) } func (r *BookmarkRoutes) bookmarkThumbnailHandler(c *gin.Context) { diff --git a/internal/http/routes/bookmark_test.go b/internal/http/routes/bookmark_test.go index 400dd95b2..3b72cbea7 100644 --- a/internal/http/routes/bookmark_test.go +++ b/internal/http/routes/bookmark_test.go @@ -155,7 +155,7 @@ func TestBookmarkFileHandlers(t *testing.T) { bookmarks, err := deps.Database.SaveBookmarks(context.TODO(), true, *bookmark) require.NoError(t, err) - bookmark, err = deps.Domains.Archiver.DownloadBookmarkArchive(bookmarks[0]) + bookmark, err = deps.Domains.Archiver.GenerateBookmarkArchive(bookmarks[0]) require.NoError(t, err) bookmarks, err = deps.Database.SaveBookmarks(context.TODO(), false, *bookmark) diff --git a/internal/model/archiver.go b/internal/model/archiver.go new file mode 100644 index 000000000..0f068b101 --- /dev/null +++ b/internal/model/archiver.go @@ -0,0 +1,89 @@ +package model + +import ( + "fmt" + "io" + "strconv" +) + +const ( + ArchiverPDF = "pdf" + ArchiverWARC = "warc" +) + +type ArchiverRequest struct { + Bookmark BookmarkDTO + Content []byte + ContentType string +} + +func (a *ArchiverRequest) String() string { + return fmt.Sprintf("ArchiverRequest{ContentType: %s}", a.ContentType) +} + +func NewArchiverRequest(bookmark BookmarkDTO, contentType string, content []byte) *ArchiverRequest { + return &ArchiverRequest{ + Bookmark: bookmark, + Content: content, + ContentType: contentType, + } +} + +type ArchiveFile struct { + reader io.Reader + contentType string + size int64 // bytes + encoding string +} + +func (a *ArchiveFile) Reader() io.Reader { + return a.reader +} + +func (a *ArchiveFile) ContentType() string { + return a.contentType +} + +func (a *ArchiveFile) Size() int64 { + return a.size +} + +func (a *ArchiveFile) Encoding() string { + return a.encoding +} + +func (a *ArchiveFile) AsHTTPHeaders() map[string]string { + headers := map[string]string{ + "Content-Type": a.contentType, + } + + if a.size > 0 { + headers["Content-Length"] = strconv.FormatInt(a.size, 10) + } + + if a.encoding != "" { + headers["Content-Encoding"] = a.encoding + } + + return headers +} + +func NewArchiveFile(reader io.Reader, contentType, encoding string, size int64) *ArchiveFile { + return &ArchiveFile{ + reader: reader, + contentType: contentType, + encoding: encoding, + size: size, + } +} + +type EbookProcessRequest struct { + Bookmark BookmarkDTO + SkipExisting bool +} + +type Archiver interface { + Archive(*ArchiverRequest) (*BookmarkDTO, error) + Matches(*ArchiverRequest) bool + GetArchiveFile(bookmark BookmarkDTO, resourcePath string) (*ArchiveFile, error) +} diff --git a/internal/model/bookmark.go b/internal/model/bookmark.go index 12400ab5b..0e7816699 100644 --- a/internal/model/bookmark.go +++ b/internal/model/bookmark.go @@ -21,6 +21,8 @@ type BookmarkDTO struct { ImageURL string `db:"image_url" json:"imageURL"` HasContent bool `db:"has_content" json:"hasContent"` Tags []Tag `json:"tags"` + Archiver string `db:"archiver" json:"archiver"` + ArchivePath string `db:"archive_path" json:"archivePath"` HasArchive bool `json:"hasArchive"` HasEbook bool `json:"hasEbook"` CreateArchive bool `json:"create_archive"` // TODO: migrate outside the DTO diff --git a/internal/model/domains.go b/internal/model/domains.go index 8b8fefa2c..d52c25834 100644 --- a/internal/model/domains.go +++ b/internal/model/domains.go @@ -2,11 +2,11 @@ package model import ( "context" + "io" "io/fs" "os" "time" - "github.com/go-shiori/warc" "github.com/spf13/afero" ) @@ -25,15 +25,19 @@ type AccountsDomain interface { } type ArchiverDomain interface { - DownloadBookmarkArchive(book BookmarkDTO) (*BookmarkDTO, error) - GetBookmarkArchive(book *BookmarkDTO) (*warc.Archive, error) + GenerateBookmarkArchive(book BookmarkDTO) (*BookmarkDTO, error) + GenerateBookmarkEbook(book EbookProcessRequest) error + ProcessBookmarkArchive(*ArchiverRequest) (*BookmarkDTO, error) + GetBookmarkArchiveFile(book *BookmarkDTO, archivePath string) (*ArchiveFile, error) } type StorageDomain interface { + // Open(name string) (os.File, error) Stat(name string) (fs.FileInfo, error) FS() afero.Fs FileExists(path string) bool DirExists(path string) bool WriteData(dst string, data []byte) error WriteFile(dst string, src *os.File) error + WriteReader(dst string, src io.Reader) error } diff --git a/internal/testutil/shiori.go b/internal/testutil/shiori.go index 737266dc4..473577328 100644 --- a/internal/testutil/shiori.go +++ b/internal/testutil/shiori.go @@ -50,8 +50,9 @@ func GetTestConfigurationAndDependencies(t *testing.T, ctx context.Context, logg func GetValidBookmark() *model.BookmarkDTO { uuidV4, _ := uuid.NewV4() return &model.BookmarkDTO{ - URL: "https://github.com/go-shiori/shiori#" + uuidV4.String(), - Title: "Shiori repository", + URL: "https://github.com/go-shiori/shiori#" + uuidV4.String(), + Title: "Shiori repository", + Archiver: model.ArchiverWARC, } } diff --git a/internal/webserver/handler-api-ext.go b/internal/webserver/handler-api-ext.go index cf9270490..f062ab95c 100644 --- a/internal/webserver/handler-api-ext.go +++ b/internal/webserver/handler-api-ext.go @@ -1,10 +1,8 @@ package webserver import ( - "bytes" "encoding/json" "fmt" - "io" "log" "net/http" "os" @@ -59,19 +57,6 @@ func (h *Handler) ApiInsertViaExtension(w http.ResponseWriter, r *http.Request, request.Title = request.URL } - // Since we are using extension, the extension might send the HTML content - // so no need to download it again here. However, if it's empty, it might be not HTML file - // so we download it here. - var contentType string - var contentBuffer io.Reader - - if request.HTML == "" { - contentBuffer, contentType, _ = core.DownloadBookmark(request.URL) - } else { - contentType = "text/html; charset=UTF-8" - contentBuffer = bytes.NewBufferString(request.HTML) - } - // Save the bookmark with whatever we already have downloaded // since we need the ID in order to download the archive // Only when old bookmark is not exists. @@ -82,38 +67,44 @@ func (h *Handler) ApiInsertViaExtension(w http.ResponseWriter, r *http.Request, return } book = books[0] + } else { + books, err := h.DB.SaveBookmarks(ctx, false, book) + if err != nil { + log.Printf("error saving bookmark before downloading content: %s", err) + return + } + book = books[0] } // At this point the web page already downloaded. // Time to process it. - if contentBuffer != nil { - book.CreateArchive = true - request := core.ProcessRequest{ - DataDir: h.DataDir, - Bookmark: book, - Content: contentBuffer, - ContentType: contentType, - } - - var isFatalErr bool - book, isFatalErr, err = core.ProcessBookmark(h.dependencies, request) - - if tmp, ok := contentBuffer.(io.ReadCloser); ok { - tmp.Close() - } + var result *model.BookmarkDTO + var errArchiver error + if request.HTML != "" { + archiverReq := model.NewArchiverRequest(book, "text/html; charset=UTF-8", []byte(request.HTML)) + result, errArchiver = h.dependencies.Domains.Archiver.ProcessBookmarkArchive(archiverReq) + } else { + result, errArchiver = h.dependencies.Domains.Archiver.GenerateBookmarkArchive(book) + } + if errArchiver != nil { + log.Printf("error downloading bookmark cache: %s", errArchiver) + w.WriteHeader(http.StatusInternalServerError) + return + } - // If we can't process or update the saved bookmark, just log it and continue on with the - // request. - if err != nil && isFatalErr { - log.Printf("failed to process bookmark: %v", err) - } else if _, err := h.DB.SaveBookmarks(ctx, false, book); err != nil { - log.Printf("error saving bookmark after downloading content: %s", err) - } + // Save the bookmark with whatever we already have downloaded + // since we need the ID in order to download the archive + books, err := h.DB.SaveBookmarks(ctx, request.ID == 0, *result) + if err != nil { + log.Printf("error saving bookmark from extension downloading content: %s", err) + w.WriteHeader(http.StatusInternalServerError) + return } + book = books[0] // Return the new bookmark w.Header().Set("Content-Type", "application/json") - err = json.NewEncoder(w).Encode(&book) + err = json.NewEncoder(w).Encode(&result) checkError(err) } diff --git a/internal/webserver/handler-api.go b/internal/webserver/handler-api.go index 0d71a8a3c..33d239c47 100644 --- a/internal/webserver/handler-api.go +++ b/internal/webserver/handler-api.go @@ -1,7 +1,6 @@ package webserver import ( - "context" "encoding/json" "fmt" "log" @@ -21,29 +20,21 @@ import ( "golang.org/x/crypto/bcrypt" ) -func downloadBookmarkContent(deps *dependencies.Dependencies, book *model.BookmarkDTO, dataDir string, request *http.Request, keepTitle, keepExcerpt bool) (*model.BookmarkDTO, error) { - content, contentType, err := core.DownloadBookmark(book.URL) +func downloadBookmarkContent(deps *dependencies.Dependencies, book *model.BookmarkDTO, keepTitle, keepExcerpt bool) (*model.BookmarkDTO, error) { + result, err := deps.Domains.Archiver.GenerateBookmarkArchive(*book) if err != nil { - return nil, fmt.Errorf("error downloading url: %s", err) + return nil, fmt.Errorf("error archiving url: %s", err) } - processRequest := core.ProcessRequest{ - DataDir: dataDir, - Bookmark: *book, - Content: content, - ContentType: contentType, - KeepTitle: keepTitle, - KeepExcerpt: keepExcerpt, + if keepTitle { + result.Title = book.Title } - result, isFatalErr, err := core.ProcessBookmark(deps, processRequest) - content.Close() - - if err != nil && isFatalErr { - return nil, fmt.Errorf("failed to process: %v", err) + if keepExcerpt { + result.Excerpt = book.Excerpt } - return &result, err + return result, err } // ApiLogout is handler for POST /api/logout @@ -240,29 +231,25 @@ func (h *Handler) ApiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h if payload.Async { go func() { - bookmark, err := downloadBookmarkContent(h.dependencies, book, h.DataDir, r, userHasDefinedTitle, book.Excerpt != "") + book, err = downloadBookmarkContent(h.dependencies, book, userHasDefinedTitle, book.Excerpt != "") if err != nil { log.Printf("error downloading boorkmark: %s", err) return } - if _, err := h.DB.SaveBookmarks(context.Background(), false, *bookmark); err != nil { - log.Printf("failed to save bookmark: %s", err) - } + }() } else { // Workaround. Download content after saving the bookmark so we have the proper database // id already set in the object regardless of the database engine. - book, err = downloadBookmarkContent(h.dependencies, book, h.DataDir, r, userHasDefinedTitle, book.Excerpt != "") + book, err = downloadBookmarkContent(h.dependencies, book, userHasDefinedTitle, book.Excerpt != "") if err != nil { log.Printf("error downloading boorkmark: %s", err) - } else if _, err := h.DB.SaveBookmarks(ctx, false, *book); err != nil { - log.Printf("failed to save bookmark: %s", err) } } // Return the new bookmark w.Header().Set("Content-Type", "application/json") - err = json.NewEncoder(w).Encode(results[0]) + err = json.NewEncoder(w).Encode(book) checkError(err) }