Skip to content

Commit

Permalink
Add support for publish to database
Browse files Browse the repository at this point in the history
  • Loading branch information
waybackarchiver committed Sep 1, 2024
1 parent b15a77d commit e1aeeb6
Show file tree
Hide file tree
Showing 29 changed files with 689 additions and 36 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,19 @@ Examples:
Flags:
--chatid string Telegram channel id
-c, --config string Configuration file path, defaults: ./wayback.conf, ~/wayback.conf, /etc/wayback.conf
-d, --daemon strings Run as daemon service, supported services are telegram, web, mastodon, twitter, discord, slack, irc, xmpp
-d, --daemon strings Run as daemon service, supported services are telegram, web, mastodon, twitter, discord, slack, irc
--debug Enable debug mode (default mode is false)
--ga Wayback webpages to Ghostarchive (default true)
-h, --help help for wayback
--ia Wayback webpages to Internet Archive
--ia Wayback webpages to Internet Archive (default true)
--info Show application information
--ip Wayback webpages to IPFS
--ip Wayback webpages to IPFS (default true)
--ipfs-host string IPFS daemon host, do not require, unless enable ipfs (default "127.0.0.1")
-m, --ipfs-mode string IPFS mode (default "pinner")
-p, --ipfs-port uint IPFS daemon port (default 5001)
--is Wayback webpages to Archive Today
--ph Wayback webpages to Telegraph
--is Wayback webpages to Archive Today (default true)
--migrate Run SQL migrations
--ph Wayback webpages to Telegraph (default true)
--print Show application configurations
-t, --token string Telegram Bot API Token
--tor Snapshot webpage via Tor anonymity network
Expand Down
3 changes: 3 additions & 0 deletions cmd/wayback/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ var (

configFile string

migrate bool

rootCmd = &cobra.Command{
Use: "wayback",
Short: "A command-line tool and daemon service for archiving webpages.",
Expand Down Expand Up @@ -88,6 +90,7 @@ func init() {
rootCmd.Flags().BoolVarP(&debug, "debug", "", false, "Enable debug mode (default mode is false)")
rootCmd.Flags().BoolVarP(&info, "info", "", false, "Show application information")
rootCmd.Flags().BoolVarP(&print, "print", "", false, "Show application configurations")
rootCmd.Flags().BoolVarP(&migrate, "migrate", "", false, "Run SQL migrations")
}

func checkRequiredFlags(cmd *cobra.Command) error {
Expand Down
26 changes: 25 additions & 1 deletion cmd/wayback/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,36 @@ import (
var signalChan chan (os.Signal) = make(chan os.Signal, 1)

func serve(_ *cobra.Command, opts *config.Options, _ []string) {
store, err := storage.Open(opts, "")
db, err := storage.NewConnectionPool(
opts.DatabaseURL(),
opts.DatabaseMinConns(),
opts.DatabaseMaxConns(),
opts.DatabaseConnectionLifetime(),
)
if err != nil {
logger.Fatal("unable to connect to database: %v", err)
}
defer db.Close()

bolt, err := storage.Open(opts, "")
if err != nil {
logger.Fatal("open storage failed: %v", err)
}
store := storage.NewStorage(db, bolt)
defer store.Close()

if !opts.IsDefaultDatabaseURL() {
if err = store.Ping(); err != nil {
logger.Fatal("ping database failed: %v", err)
}

if migrate {
if err = storage.Migrate(db); err != nil {
logger.Fatal("migrate database failed: %v", err)
}
}
}

cfg := []pooling.Option{
pooling.Capacity(opts.PoolingSize()),
pooling.Timeout(opts.WaybackTimeout()),
Expand Down
180 changes: 180 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,186 @@ func TestIPFSMode(t *testing.T) {
}
}

func TestDatabaseURL(t *testing.T) {
var tests = []struct {
url string
expected string
}{
{
url: defDatabaseURL,
expected: defDatabaseURL,
},
{
url: "foo bar",
expected: "foo bar",
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_URL", test.url)

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseURL()

if got != expected {
t.Errorf(`Unexpected database URL, got %v instead of %s`, got, expected)
}
})
}
}

func TestIsDefaultDatabaseURL(t *testing.T) {
var tests = []struct {
url string
expected bool
}{
{
url: defDatabaseURL,
expected: true,
},
{
url: "foo bar",
expected: false,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_URL", test.url)

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.IsDefaultDatabaseURL()

if got != expected {
t.Errorf(`Unexpected default database URL, got %t instead of %t`, got, expected)
}
})
}
}

func TestDatabaseMaxConns(t *testing.T) {
var tests = []struct {
maxConns int
expected int
}{
{
maxConns: defDatabaseMaxConns,
expected: defDatabaseMaxConns,
},
{
maxConns: 100,
expected: 100,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_MAX_CONNS", strconv.Itoa(test.maxConns))

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseMaxConns()

if got != expected {
t.Errorf(`Unexpected maxConns, got %v instead of %d`, got, expected)
}
})
}
}

func TestDatabaseMinConns(t *testing.T) {
var tests = []struct {
minConns int
expected int
}{
{
minConns: defDatabaseMinConns,
expected: defDatabaseMinConns,
},
{
minConns: 100,
expected: 100,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_MIN_CONNS", strconv.Itoa(test.minConns))

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseMinConns()

if got != expected {
t.Errorf(`Unexpected minConns, got %v instead of %d`, got, expected)
}
})
}
}

func TestDatabaseConnectionLifetime(t *testing.T) {
var tests = []struct {
connectionLifetime int
expected time.Duration
}{
{
connectionLifetime: defDatabaseConnectionLifetime,
expected: defDatabaseConnectionLifetime * time.Minute,
},
{
connectionLifetime: 100,
expected: 100 * time.Minute,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_CONNECTION_LIFETIME", strconv.Itoa(test.connectionLifetime))

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseConnectionLifetime()

if got != expected {
t.Errorf(`Unexpected connection lifetime, got %v instead of %d`, got, expected)
}
})
}
}

func TestIPFSTarget(t *testing.T) {
var tests = []struct {
token string // managed ipfs token
Expand Down
45 changes: 45 additions & 0 deletions config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ const (

defOmnivoreApikey = ""

defRunMigrations = false
defDatabaseURL = "user=postgres password=postgres dbname=wayback sslmode=disable"
defDatabaseMaxConns = 20
defDatabaseMinConns = 1
defDatabaseConnectionLifetime = 5

maxAttachSizeTelegram = 50000000 // 50MB
maxAttachSizeDiscord = 8000000 // 8MB
maxAttachSizeSlack = 5000000000 // 5GB
Expand Down Expand Up @@ -131,6 +137,7 @@ type Options struct {

ipfs *ipfs
slots map[string]bool
database *database
telegram *telegram
mastodon *mastodon
discord *discord
Expand Down Expand Up @@ -160,6 +167,13 @@ type Options struct {
waybackFallback bool
}

type database struct {
url string
maxConns int
minConns int
connectionLifetime int
}

type ipfs struct {
host string
port int
Expand Down Expand Up @@ -282,6 +296,12 @@ func NewOptions() *Options {
waybackMaxRetries: defWaybackMaxRetries,
waybackUserAgent: defWaybackUserAgent,
waybackFallback: defWaybackFallback,
database: &database{
url: defDatabaseURL,
maxConns: defDatabaseMaxConns,
minConns: defDatabaseMinConns,
connectionLifetime: defDatabaseConnectionLifetime,
},
ipfs: &ipfs{
host: defIPFSHost,
port: defIPFSPort,
Expand Down Expand Up @@ -443,6 +463,31 @@ func (o *Options) EnabledMetrics() bool {
return o.metrics
}

// IsDefaultDatabaseURL returns true if the default database URL is used.
func (o *Options) IsDefaultDatabaseURL() bool {
return o.database.url == defDatabaseURL
}

// DatabaseURL returns the database URL.
func (o *Options) DatabaseURL() string {
return o.database.url
}

// DatabaseMaxConns returns the maximum number of database connections.
func (o *Options) DatabaseMaxConns() int {
return o.database.maxConns
}

// DatabaseMinConns returns the minimum number of database connections.
func (o *Options) DatabaseMinConns() int {
return o.database.minConns
}

// DatabaseConnectionLifetime returns the maximum amount of time a connection may be reused.
func (o *Options) DatabaseConnectionLifetime() time.Duration {
return time.Duration(o.database.connectionLifetime) * time.Minute
}

// IPFSHost returns the host of IPFS daemon service.
func (o *Options) IPFSHost() string {
return o.ipfs.host
Expand Down
8 changes: 8 additions & 0 deletions config/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ func (p *Parser) parseLines(lines []string) (err error) {
p.opts.chromeRemoteAddr = parseString(val, defChromeRemoteAddr)
case "WAYBACK_PROXY":
p.opts.proxy = parseString(val, defProxy)
case "WAYBACK_DATABASE_URL":
p.opts.database.url = parseString(val, defDatabaseURL)
case "WAYBACK_DATABASE_MAX_CONNS":
p.opts.database.maxConns = parseInt(val, defDatabaseMaxConns)
case "WAYBACK_DATABASE_MIN_CONNS":
p.opts.database.minConns = parseInt(val, defDatabaseMinConns)
case "WAYBACK_DATABASE_CONNECTION_LIFETIME":
p.opts.database.connectionLifetime = parseInt(val, defDatabaseConnectionLifetime)
case "WAYBACK_IPFS_HOST":
p.opts.ipfs.host = parseString(val, defIPFSHost)
case "WAYBACK_IPFS_PORT":
Expand Down
4 changes: 4 additions & 0 deletions docs/environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ Use the `-c` / `--config` option to specify the build definition file to use.
| - | `WAYBACK_MEILI_INDEXING` | `capsules` | Meilisearch indexing name |
| - | `WAYBACK_MEILI_APIKEY` | - | Meilisearch admin API key |
| - | `WAYBACK_OMNIVORE_APIKEY` | - | Omnivore API key |
| - | `WAYBACK_DATABASE_URL` | - | The URL of the Postgres database |
| - | `WAYBACK_DATABASE_MAX_CONNS` | `20` | Maximum connections of the Postgres database |
| - | `WAYBACK_DATABASE_MIN_CONNS` | `1` | Minimum connections of the Postgres database |
| - | `WAYBACK_DATABASE_CONNECTION_LIFETIME` | `5` | Connection lifetime of the Postgres database |
| `-d`, `--daemon` | - | - | Run as daemon service, e.g. `telegram`, `web`, `mastodon`, `twitter`, `discord` |
| `--ia` | `WAYBACK_ENABLE_IA` | `true` | Wayback webpages to **Internet Archive** |
| `--is` | `WAYBACK_ENABLE_IS` | `true` | Wayback webpages to **Archive Today** |
Expand Down
13 changes: 13 additions & 0 deletions docs/integrations/datastore.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
title: Publish to Database
---

Note: Only Postgres is supported.

## Configuration

- `WAYBACK_DATABASE_URL`: The URL of the Postgres database, e.g. `user=postgres password=postgres dbname=wayback sslmode=disable`.
- `WAYBACK_DATABASE_MAX_CONNS`: Maximum connections of the Postgres database (optional).
- `WAYBACK_DATABASE_MIN_CONNS`: Minimum connections of the Postgres database (optional).
- `WAYBACK_DATABASE_CONNECTION_LIFETIME`: Connection lifetime of the Postgres database (optional).

Loading

0 comments on commit e1aeeb6

Please sign in to comment.