Skip to content

Commit

Permalink
simplify the export by using reflection and dynamic parquet schema ge…
Browse files Browse the repository at this point in the history
…neration

Signed-off-by: Bruno Calza <[email protected]>
  • Loading branch information
brunocalza committed May 9, 2024
1 parent e5ba4ff commit 8f862a7
Show file tree
Hide file tree
Showing 16 changed files with 482 additions and 413 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
*.db*
schemas.go
schemas.so
sqlite-exporter
output
63 changes: 63 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
linters-settings:
revive:
ignore-generated-header: false
severity: warning
confidence: 0.8
errorCode: 0
warningCode: 0
rules:
- name: blank-imports
- name: context-as-argument
- name: context-keys-type
- name: dot-imports
- name: error-return
- name: error-strings
- name: error-naming
- name: exported
- name: if-return
- name: increment-decrement
- name: var-naming
- name: var-declaration
- name: package-comments
disabled: true
- name: range
- name: receiver-naming
- name: time-naming
- name: unexported-return
- name: indent-error-flow
- name: errorf
- name: empty-block
- name: superfluous-else
- name: unused-parameter
- name: unreachable-code
- name: redefines-builtin-id
misspell:
locale: US

linters:
enable:
- revive
- misspell
- bodyclose
- unconvert
- goconst
- goimports
- unparam
- whitespace
- godot
- lll
- sqlclosecheck
- gofumpt

issues:
exclude-use-default: false

exclude:
- stutters

run:
timeout: 30m

skip-dirs:
- "pkg/database/db"
- "internal/router/controllers/apiv1"
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Lint
lint:
go run github.com/golangci/golangci-lint/cmd/[email protected] run
.PHONY: lint

# Test
test:
go test ./... -short -race -timeout 1m
.PHONY: test

# Build
build:
go build -o sqlite-exporter .
.PHONY: build
19 changes: 4 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,17 @@ Exports SQLite tables to Basin. This tool is used to export Tableland tables to
## How it works?

Basin Exporter scans a SQLite database, export the tables to Parquet files, and upload them to Basin.
In order to export the tables to Parquet files, you must first generate the Go structs for each one of the tables. You do that by running

```bash
go run . generate [DB_PATH]
```

This will genenerate the structs in `schemas.go`.
Then you build a shared object out of that file by running
Build it by running

```bash
go build -buildmode=plugin -o schemas.so schemas.go
make build
```

Now, you can export the all tables to Parquet by running
Now, export the all tables to Parquet by running

```bash
go run . export [DB_PATH]
./sqlite-export export -o ./output [DB_PATH]
```

This command does not upload the files to Basin. It exports Parquet files inside the `output` directory. You can also, choose specific tables to export by using the `tables` flag.
Expand All @@ -31,8 +25,3 @@ To export and push the files to Basin you must the machine identifier, together
```bash
go run . export --upload --machine=[HASH] [DB_PATH]
```

## Issues

- You have to generate the struct schemas. Gotta figure out a way of exporting without doing that.
- How to deal with tables that has GENERATED COLUMNS?
121 changes: 20 additions & 101 deletions cmd.go
Original file line number Diff line number Diff line change
@@ -1,94 +1,15 @@
package main

import (
"context"
"log"
"os"
"text/template"
"unicode"

"github.com/urfave/cli/v2"
)

func newGenerateCommand() *cli.Command {
return &cli.Command{
Name: "generate",
Usage: "Generate structs",
ArgsUsage: "<db_path>",
Description: "Generate structs for each one of the SQLite table",
Flags: []cli.Flag{
&cli.StringSliceFlag{
Name: "tables",
Category: "OPTIONAL:",
Usage: "The tables you want to export",
DefaultText: "empty",
},
},
Action: func(cCtx *cli.Context) error {
dbPath := cCtx.Args().First()

db, err := NewSQLite(dbPath, cCtx.StringSlice("tables"))
if err != nil {
log.Fatal(err)
}

templateData := TemplateData{
Structs: make([]Struct, 0),
Tables: make([]string, 0),
}

iter, err := db.GetTables(context.Background())
if err != nil {
log.Fatal(err)
}
defer iter.Close()
for {
table, hasNext := iter.Next()
if !hasNext {
break
}

columns, err := db.GetColumnsByTable(context.Background(), table)
if err != nil {
log.Print(err)
}

if !unicode.IsLetter(rune(table[0])) {
table = "X" + table
}

templateData.Tables = append(templateData.Tables, table)
templateData.Structs = append(templateData.Structs, Struct{
Table: table,
Schema: columns,
})
}

funcMap := template.FuncMap{
"CamelCase": camelCaseString,
}

tmpl, _ := template.New("schemas.tmpl").Funcs(funcMap).ParseFiles("schemas.tmpl")

f, err := os.OpenFile("schemas.go", os.O_CREATE|os.O_RDWR, 0o777)
if err != nil {
log.Fatal(err)
}
defer f.Close()

if err := tmpl.Execute(f, templateData); err != nil {
log.Fatal(err)
}

return nil
},
}
}

func newExportCommand() *cli.Command {
var upload bool
var tables *cli.StringSlice
var machine string
var machine, output string
return &cli.Command{
Name: "export",
Usage: "Export tables",
Expand All @@ -111,6 +32,15 @@ func newExportCommand() *cli.Command {
Destination: tables,
Value: nil,
},
&cli.StringFlag{
Name: "output",
Aliases: []string{"o"},
Category: "REQUIRED:",
Usage: "The path of the exported Parquet file on disk",
DefaultText: ".",
Destination: &output,
Value: ".",
},
&cli.StringFlag{
Name: "machine",
Category: "REQUIRED:",
Expand All @@ -123,7 +53,7 @@ func newExportCommand() *cli.Command {
Action: func(cCtx *cli.Context) error {
dbPath := cCtx.Args().First()

db, err := NewSQLite(dbPath, cCtx.StringSlice("tables"))
db, err := NewSQLite(dbPath)
if err != nil {
log.Fatal(err)
}
Expand All @@ -136,30 +66,19 @@ func newExportCommand() *cli.Command {
}
}

// TODO: make them configurable
pool := NewPool(10, 1000)
pool.Start(context.Background())

iter, err := db.GetTables(context.Background())
if err != nil {
log.Fatal(err)
}
defer iter.Close()

for {
table, hasNext := iter.Next()
if !hasNext {
break
}
tables := cCtx.StringSlice("tables")

task, err := NewTableExporter(db, table, sink)
if err != nil {
log.Print(err)
exporter := NewDatabaseExporter(db, sink, output)
if len(tables) > 0 {
if err := exporter.ExportTables(cCtx.Context, tables); err != nil {
log.Fatal(err)
}
pool.AddTask(task)
return nil
}

pool.Close()
if err := exporter.ExportAll(cCtx.Context); err != nil {
log.Fatal(err)
}

return nil
},
Expand Down
38 changes: 0 additions & 38 deletions cmd/main.go

This file was deleted.

Loading

0 comments on commit 8f862a7

Please sign in to comment.