Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Runtime: duckdb sqlite extensions based connector for sqlite #3018

Merged
merged 14 commits into from
Sep 26, 2023
Merged
2 changes: 2 additions & 0 deletions runtime/drivers/duckdb/duckdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,8 @@ func (c *connection) reopenDB() error {
"LOAD 'parquet'",
"INSTALL 'httpfs'",
"LOAD 'httpfs'",
"INSTALL 'sqlite'",
"LOAD 'sqlite'",
"SET max_expression_depth TO 250",
"SET timezone='UTC'",
}
Expand Down
51 changes: 51 additions & 0 deletions runtime/drivers/duckdb/transporter/sqlite_to_duckDB_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package transporter

import (
"context"
"database/sql"
"fmt"
"testing"

"github.com/rilldata/rill/runtime/drivers"
_ "github.com/rilldata/rill/runtime/drivers/sqlite"
"github.com/rilldata/rill/runtime/pkg/activity"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
_ "modernc.org/sqlite"
)

func Test_sqliteToDuckDB_Transfer(t *testing.T) {
tempDir := t.TempDir()

dbPath := fmt.Sprintf("%s.db", tempDir)
db, err := sql.Open("sqlite", dbPath)
require.NoError(t, err)

_, err = db.Exec(`
drop table if exists t;
create table t(i);
insert into t values(42), (314);
`)
require.NoError(t, err)
db.Close()

to, err := drivers.Open("duckdb", map[string]any{"dsn": ""}, false, activity.NewNoopClient(), zap.NewNop())
require.NoError(t, err)
olap, _ := to.AsOLAP("")

tr := &duckDBToDuckDB{
to: olap,
logger: zap.NewNop(),
}
query := fmt.Sprintf("SELECT * FROM sqlite_scan('%s', 't');", dbPath)
err = tr.Transfer(context.Background(), map[string]any{"sql": query}, map[string]any{"table": "test"}, &drivers.TransferOptions{Progress: drivers.NoOpProgress{}})
require.NoError(t, err)

res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) from test"})
require.NoError(t, err)
res.Next()
var count int
err = res.Scan(&count)
require.NoError(t, err)
require.Equal(t, 2, count)
}
19 changes: 17 additions & 2 deletions runtime/drivers/sqlite/sqlite.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

func init() {
drivers.Register("sqlite", driver{})
drivers.RegisterAsConnector("sqlite", driver{})
}

type driver struct{}
Expand Down Expand Up @@ -55,11 +56,25 @@ func (d driver) Drop(config map[string]any, logger *zap.Logger) error {
}

func (d driver) Spec() drivers.Spec {
return drivers.Spec{}
return drivers.Spec{
DisplayName: "SQLite",
Description: "Import data from SQLite table to DuckDB using duckdb SQLite scanner.",
k-anshul marked this conversation as resolved.
Show resolved Hide resolved
SourceProperties: []drivers.PropertySchema{
{
Key: "sql",
Type: drivers.StringPropertyType,
Required: true,
DisplayName: "SQL",
Description: "Query to extract data from SQLite",
Placeholder: "SELECT * FROM sqlite_scan('sqlite.db', 'film');",
begelundmuller marked this conversation as resolved.
Show resolved Hide resolved
Hint: "https://duckdb.org/docs/extensions/sqlite_scanner#querying-individual-tables",
},
},
}
}

func (d driver) HasAnonymousSourceAccess(ctx context.Context, src map[string]any, logger *zap.Logger) (bool, error) {
return false, nil
return true, nil
}

type connection struct {
Expand Down
2 changes: 1 addition & 1 deletion runtime/pkg/duckdbsql/ast_traversal.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func (a *AST) traverseTableFunction(parent astNode, childKey string) {
case "read_csv_auto", "read_csv",
"read_parquet",
"read_json", "read_json_auto", "read_json_objects", "read_json_objects_auto",
"read_ndjson_objects", "read_ndjson", "read_ndjson_auto":
"read_ndjson_objects", "read_ndjson", "read_ndjson_auto", "sqlite_scan":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems weird – the value passed to sqlite_scan isn't really a path. Should we handle it as a separate case? And also check the callers to duckdbsql that they can handle sqlite table functions (they might assume it's a file right now).

ref.Paths = getListOfValues[string](arguments[0])
default:
// only read_... are supported for now
Expand Down
2 changes: 2 additions & 0 deletions runtime/services/catalog/migrator/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,8 @@ func connectorVariables(src *runtimev1.Source, env map[string]string, repoRoot s
case "motherduck":
vars["token"] = env["token"]
vars["dsn"] = ""
case "sqlite":
vars["dsn"] = ""
k-anshul marked this conversation as resolved.
Show resolved Hide resolved
case "local_file":
vars["dsn"] = repoRoot
case "bigquery":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
// athena
"motherduck",
"postgres",
"sqlite",
"local_file",
"https",
];
Expand Down
11 changes: 11 additions & 0 deletions web-common/src/features/sources/modal/yupSchemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@ export function getYupSchema(connector: V1ConnectorSpec) {
)
.required("Source name is required"),
});
case "sqlite":
return yup.object().shape({
sql: yup.string().required("sql is required"),
sourceName: yup
.string()
.matches(
/^[a-zA-Z_][a-zA-Z0-9_]*$/,
"Source name must start with a letter or underscore and contain only letters, numbers, and underscores"
)
.required("Source name is required"),
});
case "bigquery":
return yup.object().shape({
sql: yup.string().required("sql is required"),
Expand Down
3 changes: 3 additions & 0 deletions web-common/src/features/sources/sourceUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ export function compileCreateSourceYAML(
values.sql = buildDuckDbQuery(values.path as string);
delete values.path;
break;
case "sqlite":
connectorName = "duckdb";
break;
k-anshul marked this conversation as resolved.
Show resolved Hide resolved
}

const compiledKeyValues = Object.entries(values)
Expand Down
Loading