From 0dce4e555dfb03cfcf9c41f7a0435897f458bfa2 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Mon, 4 Sep 2023 17:24:26 +0530 Subject: [PATCH 01/10] sqlite and postgres extensions based connectors --- runtime/compilers/rillv1/connectors.go | 4 ++ runtime/compilers/rillv1beta/connector.go | 4 ++ runtime/drivers/duckdb/duckdb.go | 72 ++++++++++++++----- .../transporter/sqlextension_to_duckDB.go | 54 ++++++++++++++ .../sqlextension_to_duckDB_test.go | 52 ++++++++++++++ runtime/reconcilers/source.go | 18 +++++ .../catalog/migrator/sources/sources.go | 22 ++++++ .../sources/modal/AddSourceModal.svelte | 4 +- .../src/features/sources/modal/yupSchemas.ts | 22 ++++++ 9 files changed, 234 insertions(+), 18 deletions(-) create mode 100644 runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go create mode 100644 runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go diff --git a/runtime/compilers/rillv1/connectors.go b/runtime/compilers/rillv1/connectors.go index f72cbb514bd..4e9f1c42c29 100644 --- a/runtime/compilers/rillv1/connectors.go +++ b/runtime/compilers/rillv1/connectors.go @@ -128,6 +128,10 @@ func driverSourceForAnonAccessCheck(connector string, src *runtimev1.SourceSpec) } case "motherduck": return &drivers.DatabaseSource{} + case "postgres_ext": + return &drivers.DatabaseSource{} + case "sqlite_ext": + return &drivers.DatabaseSource{} default: return nil } diff --git a/runtime/compilers/rillv1beta/connector.go b/runtime/compilers/rillv1beta/connector.go index eeec8f9608f..519adb0b810 100644 --- a/runtime/compilers/rillv1beta/connector.go +++ b/runtime/compilers/rillv1beta/connector.go @@ -180,6 +180,10 @@ func source(connector string, src *runtimev1.Source) drivers.Source { } case "motherduck": return &drivers.DatabaseSource{} + case "postgres_ext": + return &drivers.DatabaseSource{} + case "sqlite_ext": + return &drivers.DatabaseSource{} case "bigquery": return &drivers.DatabaseSource{ Props: props, diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index de28593cfc6..d15c67bd6a6 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -26,27 +26,62 @@ import ( func init() { drivers.Register("duckdb", Driver{name: "duckdb"}) drivers.Register("motherduck", Driver{name: "motherduck"}) + drivers.Register("postgres_ext", Driver{name: "postgres_ext"}) + drivers.Register("sqlite_ext", Driver{name: "sqlite_ext"}) drivers.RegisterAsConnector("motherduck", Driver{name: "motherduck"}) + drivers.RegisterAsConnector("postgres_ext", Driver{name: "postgres_ext"}) + drivers.RegisterAsConnector("sqlite_ext", Driver{name: "sqlite_ext"}) } -// spec for duckdb as motherduck connector -var spec = drivers.Spec{ - DisplayName: "MotherDuck", - Description: "Import data from MotherDuck.", - SourceProperties: []drivers.PropertySchema{ - { - Key: "sql", - Type: drivers.StringPropertyType, - Required: true, - DisplayName: "SQL", - Description: "Query to extract data from MotherDuck.", - Placeholder: "select * from my_db.my_table;", +var specs map[string]drivers.Spec = map[string]drivers.Spec{ + "motherduck": { + DisplayName: "MotherDuck", + Description: "Import data from MotherDuck.", + SourceProperties: []drivers.PropertySchema{ + { + Key: "sql", + Type: drivers.StringPropertyType, + Required: true, + DisplayName: "SQL", + Description: "Query to extract data from MotherDuck.", + Placeholder: "select * from my_db.my_table;", + }, + }, + ConfigProperties: []drivers.PropertySchema{ + { + Key: "token", + Secret: true, + }, + }, + }, + "postgres_ext": { + DisplayName: "Postgres", + Description: "Import data from Postgres table to DuckDB.", + SourceProperties: []drivers.PropertySchema{ + { + Key: "sql", + Type: drivers.StringPropertyType, + Required: true, + DisplayName: "SQL", + Description: "Query to extract data from postgres", + Placeholder: "SELECT * FROM postgres_scan('dbname=postgres user=postgres password=*** host=127.0.0.1', 'public', 'users');", + Hint: "https://duckdb.org/docs/extensions/postgres_scanner.html#querying-individual-tables", + }, }, }, - ConfigProperties: []drivers.PropertySchema{ - { - Key: "token", - Secret: true, + "sqlite_ext": { + DisplayName: "SQLite", + Description: "Import data from SQLite table to DuckDB.", + SourceProperties: []drivers.PropertySchema{ + { + Key: "sql", + Type: drivers.StringPropertyType, + Required: true, + DisplayName: "SQL", + Description: "Query to extract data from SQLite", + Placeholder: "SELECT * FROM sqlite_scan('sakila.db', 'film');", + Hint: "https://duckdb.org/docs/extensions/sqlite_scanner#querying-individual-tables", + }, }, }, } @@ -144,7 +179,7 @@ func (d Driver) Drop(config map[string]any, logger *zap.Logger) error { } func (d Driver) Spec() drivers.Spec { - return spec + return specs[d.name] } func (d Driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source, logger *zap.Logger) (bool, error) { @@ -243,6 +278,9 @@ func (c *connection) AsTransporter(from, to drivers.Handle) (drivers.Transporter if from == to { return transporter.NewDuckDBToDuckDB(olap, c.logger), true } + if from.Driver() == "postgres_ext" || from.Driver() == "sqlite_ext" { + return transporter.NewSQLExtensionToDuckDB(from, olap, c.logger), true + } if from.Driver() == "motherduck" { return transporter.NewMotherduckToDuckDB(from, olap, c.logger), true } diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go new file mode 100644 index 00000000000..efd0b07bf99 --- /dev/null +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go @@ -0,0 +1,54 @@ +package transporter + +import ( + "context" + "fmt" + "strings" + + "github.com/rilldata/rill/runtime/drivers" + "go.uber.org/zap" +) + +type sqlextensionToDuckDB struct { + to drivers.OLAPStore + from drivers.Handle + logger *zap.Logger +} + +var _ drivers.Transporter = &sqlextensionToDuckDB{} + +// NewSQLExtensionToDuckDB returns a transporter to transfer data to duckdb from a sql extension supported by duckdb. +func NewSQLExtensionToDuckDB(from drivers.Handle, to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter { + return &sqlextensionToDuckDB{ + to: to, + from: from, + logger: logger, + } +} + +func (t *sqlextensionToDuckDB) Transfer(ctx context.Context, source drivers.Source, sink drivers.Sink, opts *drivers.TransferOpts, p drivers.Progress) error { + src, ok := source.DatabaseSource() + if !ok { + return fmt.Errorf("type of source should be `drivers.DatabaseSource`") + } + fSink, ok := sink.DatabaseSink() + if !ok { + return fmt.Errorf("type of source should be `drivers.DatabaseSink`") + } + + extensionName := extName(t.from.Driver()) + if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("INSTALL '%s'; LOAD '%s';", extensionName, extensionName)}); err != nil { + return fmt.Errorf("failed to load %s extension %w", extensionName, err) + } + + userQuery := strings.TrimSpace(src.SQL) + userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s);", safeName(fSink.Table), userQuery) + + return t.to.Exec(ctx, &drivers.Statement{Query: query, Priority: 1}) +} + +func extName(name string) string { + before, _ := strings.CutSuffix(name, "_ext") + return before +} diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go new file mode 100644 index 00000000000..209460f951d --- /dev/null +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go @@ -0,0 +1,52 @@ +package transporter + +import ( + "context" + "database/sql" + "fmt" + "testing" + + "github.com/rilldata/rill/runtime/drivers" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + _ "modernc.org/sqlite" +) + +func Test_sqlextensionToDuckDB_Transfer(t *testing.T) { + tempDir := t.TempDir() + + dbPath := fmt.Sprintf("%s.db", tempDir) + db, err := sql.Open("sqlite", dbPath) + require.NoError(t, err) + + _, err = db.Exec(` + drop table if exists t; + create table t(i); + insert into t values(42), (314); + `) + require.NoError(t, err) + db.Close() + + from, err := drivers.Open("sqlite_ext", map[string]any{"dsn": ""}, false, zap.NewNop()) + require.NoError(t, err) + to, err := drivers.Open("duckdb", map[string]any{"dsn": ""}, false, zap.NewNop()) + require.NoError(t, err) + olap, _ := to.AsOLAP("") + + tr := &sqlextensionToDuckDB{ + to: olap, + from: from, + logger: zap.NewNop(), + } + query := fmt.Sprintf("SELECT * FROM sqlite_scan('%s', 't');", dbPath) + err = tr.Transfer(context.Background(), &drivers.DatabaseSource{SQL: query}, &drivers.DatabaseSink{Table: "test"}, &drivers.TransferOpts{}, drivers.NoOpProgress{}) + require.NoError(t, err) + + res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) from test"}) + require.NoError(t, err) + res.Next() + var count int + err = res.Scan(&count) + require.NoError(t, err) + require.Equal(t, 2, count) +} diff --git a/runtime/reconcilers/source.go b/runtime/reconcilers/source.go index 2c8f9632720..a955180205f 100644 --- a/runtime/reconcilers/source.go +++ b/runtime/reconcilers/source.go @@ -398,6 +398,24 @@ func driversSource(conn drivers.Handle, propsPB *structpb.Struct) (drivers.Sourc SQL: query, Database: db, }, nil + case "postgres_ext": + query, ok := props["sql"].(string) + if !ok { + return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"postgres\"") + } + + return &drivers.DatabaseSource{ + SQL: query, + }, nil + case "sqlite_ext": + query, ok := props["sql"].(string) + if !ok { + return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"sqlite\"") + } + + return &drivers.DatabaseSource{ + SQL: query, + }, nil case "duckdb": query, ok := props["sql"].(string) if !ok { diff --git a/runtime/services/catalog/migrator/sources/sources.go b/runtime/services/catalog/migrator/sources/sources.go index f5c32984e92..e14d0e2d66c 100644 --- a/runtime/services/catalog/migrator/sources/sources.go +++ b/runtime/services/catalog/migrator/sources/sources.go @@ -402,6 +402,24 @@ func source(connector string, src *runtimev1.Source) (drivers.Source, error) { SQL: query, Props: props, }, nil + case "postgres_ext": + query, ok := props["sql"].(string) + if !ok { + return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"postgres\"") + } + + return &drivers.DatabaseSource{ + SQL: query, + }, nil + case "sqlite_ext": + query, ok := props["sql"].(string) + if !ok { + return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"sqlite\"") + } + + return &drivers.DatabaseSource{ + SQL: query, + }, nil default: return nil, fmt.Errorf("connector %v not supported", connector) } @@ -433,6 +451,10 @@ func connectorVariables(src *runtimev1.Source, env map[string]string, repoRoot s case "motherduck": vars["token"] = env["token"] vars["dsn"] = "" + case "postgres_ext": + vars["dsn"] = "" + case "sqlite_ext": + vars["dsn"] = "" case "local_file": vars["dsn"] = repoRoot case "bigquery": diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte index d85e7ba84d3..d60d216bec5 100644 --- a/web-common/src/features/sources/modal/AddSourceModal.svelte +++ b/web-common/src/features/sources/modal/AddSourceModal.svelte @@ -27,6 +27,8 @@ "https", "local_file", "motherduck", + "postgres_ext", + "sqlite_ext", "bigquery", ]; @@ -91,7 +93,7 @@
- {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "bigquery"} + {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "postgres_ext" || selectedConnector?.name === "sqlite_ext" || selectedConnector?.name === "bigquery"} {#key selectedConnector} {/key} diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts index 3eb7b57be8e..5ea06c87c23 100644 --- a/web-common/src/features/sources/modal/yupSchemas.ts +++ b/web-common/src/features/sources/modal/yupSchemas.ts @@ -57,6 +57,28 @@ export function getYupSchema(connector: V1ConnectorSpec) { ) .required("Source name is required"), }); + case "postgres_ext": + return yup.object().shape({ + sql: yup.string().required("sql is required"), + sourceName: yup + .string() + .matches( + /^[a-zA-Z_][a-zA-Z0-9_]*$/, + "Source name must start with a letter or underscore and contain only letters, numbers, and underscores" + ) + .required("Source name is required"), + }); + case "sqlite_ext": + return yup.object().shape({ + sql: yup.string().required("sql is required"), + sourceName: yup + .string() + .matches( + /^[a-zA-Z_][a-zA-Z0-9_]*$/, + "Source name must start with a letter or underscore and contain only letters, numbers, and underscores" + ) + .required("Source name is required"), + }); case "bigquery": return yup.object().shape({ sql: yup.string().required("sql is required"), From ea178e01dcd5705a31aa2044928b7c329939ea10 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Fri, 8 Sep 2023 16:57:11 +0530 Subject: [PATCH 02/10] removing postgres connector --- runtime/compilers/rillv1/connectors.go | 4 +- runtime/compilers/rillv1beta/connector.go | 4 +- runtime/drivers/duckdb/duckdb.go | 71 +++++-------------- .../transporter/sqlextension_to_duckDB.go | 1 + .../sqlextension_to_duckDB_test.go | 6 +- runtime/drivers/sqlite/sqlite.go | 17 ++++- runtime/reconcilers/source.go | 11 +-- .../catalog/migrator/sources/sources.go | 15 +--- .../sources/modal/AddSourceModal.svelte | 5 +- .../src/features/sources/modal/yupSchemas.ts | 13 +--- 10 files changed, 47 insertions(+), 100 deletions(-) diff --git a/runtime/compilers/rillv1/connectors.go b/runtime/compilers/rillv1/connectors.go index 4e9f1c42c29..63d3b62f936 100644 --- a/runtime/compilers/rillv1/connectors.go +++ b/runtime/compilers/rillv1/connectors.go @@ -128,9 +128,7 @@ func driverSourceForAnonAccessCheck(connector string, src *runtimev1.SourceSpec) } case "motherduck": return &drivers.DatabaseSource{} - case "postgres_ext": - return &drivers.DatabaseSource{} - case "sqlite_ext": + case "sqlite": return &drivers.DatabaseSource{} default: return nil diff --git a/runtime/compilers/rillv1beta/connector.go b/runtime/compilers/rillv1beta/connector.go index 519adb0b810..4df147113cc 100644 --- a/runtime/compilers/rillv1beta/connector.go +++ b/runtime/compilers/rillv1beta/connector.go @@ -180,9 +180,7 @@ func source(connector string, src *runtimev1.Source) drivers.Source { } case "motherduck": return &drivers.DatabaseSource{} - case "postgres_ext": - return &drivers.DatabaseSource{} - case "sqlite_ext": + case "sqlite": return &drivers.DatabaseSource{} case "bigquery": return &drivers.DatabaseSource{ diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 45366adcb4b..e3a41dafd42 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -26,62 +26,27 @@ import ( func init() { drivers.Register("duckdb", Driver{name: "duckdb"}) drivers.Register("motherduck", Driver{name: "motherduck"}) - drivers.Register("postgres_ext", Driver{name: "postgres_ext"}) - drivers.Register("sqlite_ext", Driver{name: "sqlite_ext"}) drivers.RegisterAsConnector("motherduck", Driver{name: "motherduck"}) - drivers.RegisterAsConnector("postgres_ext", Driver{name: "postgres_ext"}) - drivers.RegisterAsConnector("sqlite_ext", Driver{name: "sqlite_ext"}) } -var specs map[string]drivers.Spec = map[string]drivers.Spec{ - "motherduck": { - DisplayName: "MotherDuck", - Description: "Import data from MotherDuck.", - SourceProperties: []drivers.PropertySchema{ - { - Key: "sql", - Type: drivers.StringPropertyType, - Required: true, - DisplayName: "SQL", - Description: "Query to extract data from MotherDuck.", - Placeholder: "select * from my_db.my_table;", - }, - }, - ConfigProperties: []drivers.PropertySchema{ - { - Key: "token", - Secret: true, - }, - }, - }, - "postgres_ext": { - DisplayName: "Postgres", - Description: "Import data from Postgres table to DuckDB.", - SourceProperties: []drivers.PropertySchema{ - { - Key: "sql", - Type: drivers.StringPropertyType, - Required: true, - DisplayName: "SQL", - Description: "Query to extract data from postgres", - Placeholder: "SELECT * FROM postgres_scan('dbname=postgres user=postgres password=*** host=127.0.0.1', 'public', 'users');", - Hint: "https://duckdb.org/docs/extensions/postgres_scanner.html#querying-individual-tables", - }, +// spec for duckdb as motherduck connector +var spec = drivers.Spec{ + DisplayName: "MotherDuck", + Description: "Import data from MotherDuck.", + SourceProperties: []drivers.PropertySchema{ + { + Key: "sql", + Type: drivers.StringPropertyType, + Required: true, + DisplayName: "SQL", + Description: "Query to extract data from MotherDuck.", + Placeholder: "select * from my_db.my_table;", }, }, - "sqlite_ext": { - DisplayName: "SQLite", - Description: "Import data from SQLite table to DuckDB.", - SourceProperties: []drivers.PropertySchema{ - { - Key: "sql", - Type: drivers.StringPropertyType, - Required: true, - DisplayName: "SQL", - Description: "Query to extract data from SQLite", - Placeholder: "SELECT * FROM sqlite_scan('sakila.db', 'film');", - Hint: "https://duckdb.org/docs/extensions/sqlite_scanner#querying-individual-tables", - }, + ConfigProperties: []drivers.PropertySchema{ + { + Key: "token", + Secret: true, }, }, } @@ -170,7 +135,7 @@ func (d Driver) Drop(config map[string]any, logger *zap.Logger) error { } func (d Driver) Spec() drivers.Spec { - return specs[d.name] + return spec } func (d Driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source, logger *zap.Logger) (bool, error) { @@ -269,7 +234,7 @@ func (c *connection) AsTransporter(from, to drivers.Handle) (drivers.Transporter if from == to { return transporter.NewDuckDBToDuckDB(olap, c.logger), true } - if from.Driver() == "postgres_ext" || from.Driver() == "sqlite_ext" { + if from.Driver() == "sqlite" { return transporter.NewSQLExtensionToDuckDB(from, olap, c.logger), true } if from.Driver() == "motherduck" { diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go index efd0b07bf99..46591081c8c 100644 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go @@ -18,6 +18,7 @@ type sqlextensionToDuckDB struct { var _ drivers.Transporter = &sqlextensionToDuckDB{} // NewSQLExtensionToDuckDB returns a transporter to transfer data to duckdb from a sql extension supported by duckdb. +// Currently only sqlite extension is supported. Postgres is not supported due to licensing issues. func NewSQLExtensionToDuckDB(from drivers.Handle, to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter { return &sqlextensionToDuckDB{ to: to, diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go index 209460f951d..59571b7b439 100644 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go @@ -7,6 +7,8 @@ import ( "testing" "github.com/rilldata/rill/runtime/drivers" + _ "github.com/rilldata/rill/runtime/drivers/sqlite" + "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" _ "modernc.org/sqlite" @@ -27,9 +29,9 @@ func Test_sqlextensionToDuckDB_Transfer(t *testing.T) { require.NoError(t, err) db.Close() - from, err := drivers.Open("sqlite_ext", map[string]any{"dsn": ""}, false, zap.NewNop()) + from, err := drivers.Open("sqlite", map[string]any{"dsn": ""}, false, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) - to, err := drivers.Open("duckdb", map[string]any{"dsn": ""}, false, zap.NewNop()) + to, err := drivers.Open("duckdb", map[string]any{"dsn": ""}, false, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index c1961b4f91f..98288f203d8 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -17,6 +17,7 @@ import ( func init() { drivers.Register("sqlite", driver{}) + drivers.RegisterAsConnector("sqlite", driver{}) } type driver struct{} @@ -55,7 +56,21 @@ func (d driver) Drop(config map[string]any, logger *zap.Logger) error { } func (d driver) Spec() drivers.Spec { - return drivers.Spec{} + return drivers.Spec{ + DisplayName: "SQLite", + Description: "Import data from SQLite table to DuckDB.", + SourceProperties: []drivers.PropertySchema{ + { + Key: "sql", + Type: drivers.StringPropertyType, + Required: true, + DisplayName: "SQL", + Description: "Query to extract data from SQLite", + Placeholder: "SELECT * FROM sqlite_scan('/Users/kanshul/.config/gcloud/access_tokens.db', 'film');", + Hint: "https://duckdb.org/docs/extensions/sqlite_scanner#querying-individual-tables", + }, + }, + } } func (d driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source, logger *zap.Logger) (bool, error) { diff --git a/runtime/reconcilers/source.go b/runtime/reconcilers/source.go index a955180205f..efbb7b3de31 100644 --- a/runtime/reconcilers/source.go +++ b/runtime/reconcilers/source.go @@ -398,16 +398,7 @@ func driversSource(conn drivers.Handle, propsPB *structpb.Struct) (drivers.Sourc SQL: query, Database: db, }, nil - case "postgres_ext": - query, ok := props["sql"].(string) - if !ok { - return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"postgres\"") - } - - return &drivers.DatabaseSource{ - SQL: query, - }, nil - case "sqlite_ext": + case "sqlite": query, ok := props["sql"].(string) if !ok { return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"sqlite\"") diff --git a/runtime/services/catalog/migrator/sources/sources.go b/runtime/services/catalog/migrator/sources/sources.go index 2c0e110dbbe..9f389393466 100644 --- a/runtime/services/catalog/migrator/sources/sources.go +++ b/runtime/services/catalog/migrator/sources/sources.go @@ -403,16 +403,7 @@ func source(connector string, src *runtimev1.Source) (drivers.Source, error) { SQL: query, Props: props, }, nil - case "postgres_ext": - query, ok := props["sql"].(string) - if !ok { - return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"postgres\"") - } - - return &drivers.DatabaseSource{ - SQL: query, - }, nil - case "sqlite_ext": + case "sqlite": query, ok := props["sql"].(string) if !ok { return nil, fmt.Errorf("property \"sql\" is mandatory for connector \"sqlite\"") @@ -452,9 +443,7 @@ func connectorVariables(src *runtimev1.Source, env map[string]string, repoRoot s case "motherduck": vars["token"] = env["token"] vars["dsn"] = "" - case "postgres_ext": - vars["dsn"] = "" - case "sqlite_ext": + case "sqlite": vars["dsn"] = "" case "local_file": vars["dsn"] = repoRoot diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte index d60d216bec5..de50a5ab278 100644 --- a/web-common/src/features/sources/modal/AddSourceModal.svelte +++ b/web-common/src/features/sources/modal/AddSourceModal.svelte @@ -27,8 +27,7 @@ "https", "local_file", "motherduck", - "postgres_ext", - "sqlite_ext", + "sqlite", "bigquery", ]; @@ -93,7 +92,7 @@
- {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "postgres_ext" || selectedConnector?.name === "sqlite_ext" || selectedConnector?.name === "bigquery"} + {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "postgres_ext" || selectedConnector?.name === "sqlite"} {#key selectedConnector} {/key} diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts index 5ea06c87c23..2c4962946e5 100644 --- a/web-common/src/features/sources/modal/yupSchemas.ts +++ b/web-common/src/features/sources/modal/yupSchemas.ts @@ -57,18 +57,7 @@ export function getYupSchema(connector: V1ConnectorSpec) { ) .required("Source name is required"), }); - case "postgres_ext": - return yup.object().shape({ - sql: yup.string().required("sql is required"), - sourceName: yup - .string() - .matches( - /^[a-zA-Z_][a-zA-Z0-9_]*$/, - "Source name must start with a letter or underscore and contain only letters, numbers, and underscores" - ) - .required("Source name is required"), - }); - case "sqlite_ext": + case "sqlite": return yup.object().shape({ sql: yup.string().required("sql is required"), sourceName: yup From 15a3a5ced5c1462129f4e167aaccc2e0135b1bf6 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Fri, 8 Sep 2023 17:00:39 +0530 Subject: [PATCH 03/10] self review --- .../drivers/duckdb/transporter/sqlextension_to_duckDB.go | 7 +------ runtime/drivers/sqlite/sqlite.go | 2 +- .../src/features/sources/modal/AddSourceModal.svelte | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go index 46591081c8c..c162e0b0e80 100644 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go @@ -37,7 +37,7 @@ func (t *sqlextensionToDuckDB) Transfer(ctx context.Context, source drivers.Sour return fmt.Errorf("type of source should be `drivers.DatabaseSink`") } - extensionName := extName(t.from.Driver()) + extensionName := t.from.Driver() if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("INSTALL '%s'; LOAD '%s';", extensionName, extensionName)}); err != nil { return fmt.Errorf("failed to load %s extension %w", extensionName, err) } @@ -48,8 +48,3 @@ func (t *sqlextensionToDuckDB) Transfer(ctx context.Context, source drivers.Sour return t.to.Exec(ctx, &drivers.Statement{Query: query, Priority: 1}) } - -func extName(name string) string { - before, _ := strings.CutSuffix(name, "_ext") - return before -} diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index 98288f203d8..c3877bd96aa 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -66,7 +66,7 @@ func (d driver) Spec() drivers.Spec { Required: true, DisplayName: "SQL", Description: "Query to extract data from SQLite", - Placeholder: "SELECT * FROM sqlite_scan('/Users/kanshul/.config/gcloud/access_tokens.db', 'film');", + Placeholder: "SELECT * FROM sqlite_scan('sqlite.db', 'film');", Hint: "https://duckdb.org/docs/extensions/sqlite_scanner#querying-individual-tables", }, }, diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte index de50a5ab278..26d33fe8433 100644 --- a/web-common/src/features/sources/modal/AddSourceModal.svelte +++ b/web-common/src/features/sources/modal/AddSourceModal.svelte @@ -92,7 +92,7 @@
- {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "postgres_ext" || selectedConnector?.name === "sqlite"} + {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "bigquery" || selectedConnector?.name === "sqlite"} {#key selectedConnector} {/key} From 928e3d1ab0bccab73dc7180790a083a8b24dbc9f Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Thu, 14 Sep 2023 11:34:18 +0530 Subject: [PATCH 04/10] review comments --- .../transporter/sqlextension_to_duckDB.go | 32 +++++++++++-------- .../sqlextension_to_duckDB_test.go | 2 +- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go index c162e0b0e80..34667d15617 100644 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go @@ -2,6 +2,7 @@ package transporter import ( "context" + "database/sql" "fmt" "strings" @@ -27,24 +28,27 @@ func NewSQLExtensionToDuckDB(from drivers.Handle, to drivers.OLAPStore, logger * } } -func (t *sqlextensionToDuckDB) Transfer(ctx context.Context, source drivers.Source, sink drivers.Sink, opts *drivers.TransferOpts, p drivers.Progress) error { - src, ok := source.DatabaseSource() - if !ok { - return fmt.Errorf("type of source should be `drivers.DatabaseSource`") +func (t *sqlextensionToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[string]any, opts *drivers.TransferOpts, p drivers.Progress) error { + srcCfg, err := parseDBSourceProperties(srcProps) + if err != nil { + return err } - fSink, ok := sink.DatabaseSink() - if !ok { - return fmt.Errorf("type of source should be `drivers.DatabaseSink`") + + sinkCfg, err := parseSinkProperties(sinkProps) + if err != nil { + return err } extensionName := t.from.Driver() - if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("INSTALL '%s'; LOAD '%s';", extensionName, extensionName)}); err != nil { - return fmt.Errorf("failed to load %s extension %w", extensionName, err) - } + return t.to.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("INSTALL '%s'; LOAD '%s';", extensionName, extensionName)}); err != nil { + return fmt.Errorf("failed to load %s extension %w", extensionName, err) + } - userQuery := strings.TrimSpace(src.SQL) - userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s);", safeName(fSink.Table), userQuery) + userQuery := strings.TrimSpace(srcCfg.SQL) + userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s);", safeName(sinkCfg.Table), userQuery) - return t.to.Exec(ctx, &drivers.Statement{Query: query, Priority: 1}) + return t.to.Exec(ctx, &drivers.Statement{Query: query, Priority: 1}) + }) } diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go index 59571b7b439..aea4981bd2b 100644 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go @@ -41,7 +41,7 @@ func Test_sqlextensionToDuckDB_Transfer(t *testing.T) { logger: zap.NewNop(), } query := fmt.Sprintf("SELECT * FROM sqlite_scan('%s', 't');", dbPath) - err = tr.Transfer(context.Background(), &drivers.DatabaseSource{SQL: query}, &drivers.DatabaseSink{Table: "test"}, &drivers.TransferOpts{}, drivers.NoOpProgress{}) + err = tr.Transfer(context.Background(), map[string]any{"sql": query}, map[string]any{"table": "test"}, &drivers.TransferOpts{}, drivers.NoOpProgress{}) require.NoError(t, err) res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) from test"}) From aae84382e3fe686062b8a2209366804172902731 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Mon, 25 Sep 2023 16:01:48 +0530 Subject: [PATCH 05/10] auto load sqlite connector --- runtime/drivers/duckdb/duckdb.go | 5 +- .../transporter/sqlextension_to_duckDB.go | 54 ------------------- .../sqlextension_to_duckDB_test.go | 7 +-- runtime/drivers/sqlite/sqlite.go | 2 +- runtime/pkg/duckdbsql/ast_traversal.go | 2 +- .../sources/modal/AddSourceModal.svelte | 1 + .../src/features/sources/sourceUtils.ts | 3 ++ 7 files changed, 10 insertions(+), 64 deletions(-) delete mode 100644 runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 54bffabc62d..a15c6474b4f 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -233,9 +233,6 @@ func (c *connection) AsTransporter(from, to drivers.Handle) (drivers.Transporter if from == to { return transporter.NewDuckDBToDuckDB(olap, c.logger), true } - if from.Driver() == "sqlite" { - return transporter.NewSQLExtensionToDuckDB(from, olap, c.logger), true - } if from.Driver() == "motherduck" { return transporter.NewMotherduckToDuckDB(from, olap, c.logger), true } @@ -277,6 +274,8 @@ func (c *connection) reopenDB() error { "LOAD 'parquet'", "INSTALL 'httpfs'", "LOAD 'httpfs'", + "INSTALL 'sqlite'", + "LOAD 'sqlite'", "SET max_expression_depth TO 250", "SET timezone='UTC'", } diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go deleted file mode 100644 index 34667d15617..00000000000 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB.go +++ /dev/null @@ -1,54 +0,0 @@ -package transporter - -import ( - "context" - "database/sql" - "fmt" - "strings" - - "github.com/rilldata/rill/runtime/drivers" - "go.uber.org/zap" -) - -type sqlextensionToDuckDB struct { - to drivers.OLAPStore - from drivers.Handle - logger *zap.Logger -} - -var _ drivers.Transporter = &sqlextensionToDuckDB{} - -// NewSQLExtensionToDuckDB returns a transporter to transfer data to duckdb from a sql extension supported by duckdb. -// Currently only sqlite extension is supported. Postgres is not supported due to licensing issues. -func NewSQLExtensionToDuckDB(from drivers.Handle, to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter { - return &sqlextensionToDuckDB{ - to: to, - from: from, - logger: logger, - } -} - -func (t *sqlextensionToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[string]any, opts *drivers.TransferOpts, p drivers.Progress) error { - srcCfg, err := parseDBSourceProperties(srcProps) - if err != nil { - return err - } - - sinkCfg, err := parseSinkProperties(sinkProps) - if err != nil { - return err - } - - extensionName := t.from.Driver() - return t.to.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { - if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("INSTALL '%s'; LOAD '%s';", extensionName, extensionName)}); err != nil { - return fmt.Errorf("failed to load %s extension %w", extensionName, err) - } - - userQuery := strings.TrimSpace(srcCfg.SQL) - userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s);", safeName(sinkCfg.Table), userQuery) - - return t.to.Exec(ctx, &drivers.Statement{Query: query, Priority: 1}) - }) -} diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go index aea4981bd2b..b86c5209510 100644 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go @@ -29,19 +29,16 @@ func Test_sqlextensionToDuckDB_Transfer(t *testing.T) { require.NoError(t, err) db.Close() - from, err := drivers.Open("sqlite", map[string]any{"dsn": ""}, false, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) to, err := drivers.Open("duckdb", map[string]any{"dsn": ""}, false, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") - tr := &sqlextensionToDuckDB{ + tr := &duckDBToDuckDB{ to: olap, - from: from, logger: zap.NewNop(), } query := fmt.Sprintf("SELECT * FROM sqlite_scan('%s', 't');", dbPath) - err = tr.Transfer(context.Background(), map[string]any{"sql": query}, map[string]any{"table": "test"}, &drivers.TransferOpts{}, drivers.NoOpProgress{}) + err = tr.Transfer(context.Background(), map[string]any{"sql": query}, map[string]any{"table": "test"}, &drivers.TransferOptions{Progress: drivers.NoOpProgress{}}) require.NoError(t, err) res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) from test"}) diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index df83b721162..e35056171be 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -74,7 +74,7 @@ func (d driver) Spec() drivers.Spec { } func (d driver) HasAnonymousSourceAccess(ctx context.Context, src map[string]any, logger *zap.Logger) (bool, error) { - return false, nil + return true, nil } type connection struct { diff --git a/runtime/pkg/duckdbsql/ast_traversal.go b/runtime/pkg/duckdbsql/ast_traversal.go index eb70ac87627..723898e6715 100644 --- a/runtime/pkg/duckdbsql/ast_traversal.go +++ b/runtime/pkg/duckdbsql/ast_traversal.go @@ -175,7 +175,7 @@ func (a *AST) traverseTableFunction(parent astNode, childKey string) { case "read_csv_auto", "read_csv", "read_parquet", "read_json", "read_json_auto", "read_json_objects", "read_json_objects_auto", - "read_ndjson_objects", "read_ndjson", "read_ndjson_auto": + "read_ndjson_objects", "read_ndjson", "read_ndjson_auto", "sqlite_scan": ref.Paths = getListOfValues[string](arguments[0]) default: // only read_... are supported for now diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte index 208cb6d029e..618ec75a4a7 100644 --- a/web-common/src/features/sources/modal/AddSourceModal.svelte +++ b/web-common/src/features/sources/modal/AddSourceModal.svelte @@ -37,6 +37,7 @@ "bigquery", // athena "motherduck", + "sqlite", // postgres "local_file", "https", diff --git a/web-common/src/features/sources/sourceUtils.ts b/web-common/src/features/sources/sourceUtils.ts index 11f1ffa611c..9289286d01d 100644 --- a/web-common/src/features/sources/sourceUtils.ts +++ b/web-common/src/features/sources/sourceUtils.ts @@ -30,6 +30,9 @@ export function compileCreateSourceYAML( values.sql = buildDuckDbQuery(values.path as string); delete values.path; break; + case "sqlite": + connectorName = "duckdb"; + break; } const compiledKeyValues = Object.entries(values) From 993bd04a54331136da188acc984522dcb548e8d4 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Mon, 25 Sep 2023 16:07:34 +0530 Subject: [PATCH 06/10] small self review --- ...{sqlextension_to_duckDB_test.go => sqlite_to_duckDB_test.go} | 2 +- runtime/drivers/sqlite/sqlite.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename runtime/drivers/duckdb/transporter/{sqlextension_to_duckDB_test.go => sqlite_to_duckDB_test.go} (95%) diff --git a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go b/runtime/drivers/duckdb/transporter/sqlite_to_duckDB_test.go similarity index 95% rename from runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go rename to runtime/drivers/duckdb/transporter/sqlite_to_duckDB_test.go index b86c5209510..d1e070f7c1e 100644 --- a/runtime/drivers/duckdb/transporter/sqlextension_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter/sqlite_to_duckDB_test.go @@ -14,7 +14,7 @@ import ( _ "modernc.org/sqlite" ) -func Test_sqlextensionToDuckDB_Transfer(t *testing.T) { +func Test_sqliteToDuckDB_Transfer(t *testing.T) { tempDir := t.TempDir() dbPath := fmt.Sprintf("%s.db", tempDir) diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index e35056171be..9e745c213ba 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -58,7 +58,7 @@ func (d driver) Drop(config map[string]any, logger *zap.Logger) error { func (d driver) Spec() drivers.Spec { return drivers.Spec{ DisplayName: "SQLite", - Description: "Import data from SQLite table to DuckDB.", + Description: "Import data from SQLite table to DuckDB using duckdb SQLite scanner.", SourceProperties: []drivers.PropertySchema{ { Key: "sql", From d5594674c80d32cd75a3af31a686886fdba1ec83 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 25 Sep 2023 16:33:21 +0530 Subject: [PATCH 07/10] Update runtime/drivers/sqlite/sqlite.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Benjamin Egelund-Müller --- runtime/drivers/sqlite/sqlite.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index 9e745c213ba..1912da6ce6b 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -58,7 +58,7 @@ func (d driver) Drop(config map[string]any, logger *zap.Logger) error { func (d driver) Spec() drivers.Spec { return drivers.Spec{ DisplayName: "SQLite", - Description: "Import data from SQLite table to DuckDB using duckdb SQLite scanner.", + Description: "Import data from SQLite into DuckDB.", SourceProperties: []drivers.PropertySchema{ { Key: "sql", From 8c8c029c86d7d733131f782705e83e210129b597 Mon Sep 17 00:00:00 2001 From: Eric P Green Date: Mon, 25 Sep 2023 10:48:09 -0700 Subject: [PATCH 08/10] Add icon for SQLite connector --- .../components/icons/connectors/SQLite.svelte | 38 +++++++++++++++++++ .../sources/modal/AddSourceModal.svelte | 2 + 2 files changed, 40 insertions(+) create mode 100644 web-common/src/components/icons/connectors/SQLite.svelte diff --git a/web-common/src/components/icons/connectors/SQLite.svelte b/web-common/src/components/icons/connectors/SQLite.svelte new file mode 100644 index 00000000000..0866df04640 --- /dev/null +++ b/web-common/src/components/icons/connectors/SQLite.svelte @@ -0,0 +1,38 @@ + + + + + + + + + + + + + diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte index 0905e9104df..b3454b55225 100644 --- a/web-common/src/features/sources/modal/AddSourceModal.svelte +++ b/web-common/src/features/sources/modal/AddSourceModal.svelte @@ -13,6 +13,7 @@ import LocalFile from "../../../components/icons/connectors/LocalFile.svelte"; import MotherDuck from "../../../components/icons/connectors/MotherDuck.svelte"; import Postgres from "../../../components/icons/connectors/Postgres.svelte"; + import SQLite from "../../../components/icons/connectors/SQLite.svelte"; import { appScreen } from "../../../layout/app-store"; import { behaviourEvent } from "../../../metrics/initMetrics"; import { @@ -53,6 +54,7 @@ // athena: AmazonAthena, motherduck: MotherDuck, postgres: Postgres, + sqlite: SQLite, local_file: LocalFile, https: Https, }; From 89f79e46b85a31c9949cbea4bd6fb665ef0eb029 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Tue, 26 Sep 2023 12:34:01 +0530 Subject: [PATCH 09/10] ast rewrite support for sqlite_scan --- .../duckdb/transporter/duckDB_to_duckDB.go | 1 + .../transporter/objectStore_to_duckDB.go | 1 + runtime/drivers/sqlite/sqlite.go | 17 +++++--- runtime/pkg/duckdbsql/ast.go | 8 ++++ runtime/pkg/duckdbsql/ast_rewrite.go | 43 +++++++++++++++++++ runtime/pkg/duckdbsql/ast_test.go | 25 +++++++++++ runtime/pkg/duckdbsql/ast_traversal.go | 23 +++++++++- .../catalog/migrator/sources/sources.go | 5 ++- .../src/features/sources/modal/yupSchemas.ts | 3 +- .../src/features/sources/sourceUtils.ts | 3 ++ 10 files changed, 120 insertions(+), 9 deletions(-) diff --git a/runtime/drivers/duckdb/transporter/duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter/duckDB_to_duckDB.go index ce1c707eaf2..1b10fe0806f 100644 --- a/runtime/drivers/duckdb/transporter/duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter/duckDB_to_duckDB.go @@ -119,6 +119,7 @@ func rewriteLocalPaths(ast *duckdbsql.AST, basePath string, allowHostAccess bool Function: t.Function, Paths: res, Properties: t.Properties, + Params: t.Params, }, true }) if resolveErr != nil { diff --git a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go index d2121bd64ed..66ff5c39e90 100644 --- a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go @@ -303,6 +303,7 @@ func (t *objectStoreToDuckDB) ingestDuckDBSQL(ctx context.Context, originalSQL s Paths: allFiles, Function: table.Function, Properties: table.Properties, + Params: table.Params, }, true }) if err != nil { diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index 1912da6ce6b..7d36d2e5c38 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -61,13 +61,20 @@ func (d driver) Spec() drivers.Spec { Description: "Import data from SQLite into DuckDB.", SourceProperties: []drivers.PropertySchema{ { - Key: "sql", + Key: "db", Type: drivers.StringPropertyType, Required: true, - DisplayName: "SQL", - Description: "Query to extract data from SQLite", - Placeholder: "SELECT * FROM sqlite_scan('sqlite.db', 'film');", - Hint: "https://duckdb.org/docs/extensions/sqlite_scanner#querying-individual-tables", + DisplayName: "DB", + Description: "Path to SQLite db file", + Placeholder: "sqlite.db", + }, + { + Key: "table", + Type: drivers.StringPropertyType, + Required: true, + DisplayName: "Table", + Description: "SQLite table name", + Placeholder: "table", }, }, } diff --git a/runtime/pkg/duckdbsql/ast.go b/runtime/pkg/duckdbsql/ast.go index 50caa9380e7..f1dc06d1e82 100644 --- a/runtime/pkg/duckdbsql/ast.go +++ b/runtime/pkg/duckdbsql/ast.go @@ -82,6 +82,12 @@ func (a *AST) RewriteTableRefs(fn func(table *TableRef) (*TableRef, bool)) error } } else if newRef.Function != "" { switch newRef.Function { + case "sqlite_scan": + newRef.Params[0] = newRef.Paths[0] + err := node.rewriteToSqliteScanFunction(newRef.Function, newRef.Params) + if err != nil { + return err + } case "read_csv_auto", "read_csv", "read_parquet", "read_json", "read_json_auto", "read_json_objects", "read_json_objects_auto", @@ -155,6 +161,8 @@ type TableRef struct { Paths []string Properties map[string]any LocalAlias bool + // Params passed to sqlite_scan + Params []string } // ColumnRef has information about a column in the select list of a DuckDB SQL statement diff --git a/runtime/pkg/duckdbsql/ast_rewrite.go b/runtime/pkg/duckdbsql/ast_rewrite.go index fbe76349f1b..33f0298eb94 100644 --- a/runtime/pkg/duckdbsql/ast_rewrite.go +++ b/runtime/pkg/duckdbsql/ast_rewrite.go @@ -62,6 +62,15 @@ func (sn *selectNode) rewriteLimit(limit, offset int) error { return nil } +func (fn *fromNode) rewriteToSqliteScanFunction(name string, params []string) error { + baseTable, err := createSqliteScanTableFunction(params, fn.ast) + if err != nil { + return err + } + fn.parent[fn.childKey] = baseTable + return nil +} + func createBaseTable(name string, ast astNode) (astNode, error) { // TODO: validation and fill in other fields from ast var n astNode @@ -357,3 +366,37 @@ func createFunctionCall(key, name, schema string) (astNode, error) { }`, key, name, schema)), &n) return n, err } + +func createSqliteScanTableFunction(params []string, ast astNode) (astNode, error) { + var n astNode + err := json.Unmarshal([]byte(`{ + "type": "TABLE_FUNCTION", + "alias": "", + "sample": null, + "function": {}, + "column_name_alias": [] +}`), &n) + if err != nil { + return nil, err + } + + fn, err := createFunctionCall("", "sqlite_scan", "") + if err != nil { + return nil, err + } + n[astKeyFunction] = fn + + var list []astNode + for _, v := range params { + vn, err := createGenericValue("", v) + if err != nil { + return nil, err + } + if vn == nil { + continue + } + list = append(list, vn) + } + fn[astKeyChildren] = list + return n, nil +} diff --git a/runtime/pkg/duckdbsql/ast_test.go b/runtime/pkg/duckdbsql/ast_test.go index 088f44d05e6..3dfd0825f6f 100644 --- a/runtime/pkg/duckdbsql/ast_test.go +++ b/runtime/pkg/duckdbsql/ast_test.go @@ -126,6 +126,18 @@ select * from read_json( {Name: "tbl3", LocalAlias: true}, }, }, + { + "sqlite_scan", + `select * from sqlite_scan('mydatabase.db', 'table')`, + []*TableRef{ + { + Function: "sqlite_scan", + Paths: []string{"mydatabase.db"}, + Params: []string{"mydatabase.db", "table"}, + Properties: make(map[string]any), + }, + }, + }, { "other table functions", `select * from generate_series(TIMESTAMP '2001-04-10', TIMESTAMP '2001-04-11', INTERVAL 30 MINUTE)`, @@ -470,6 +482,19 @@ func TestAST_RewriteWithFunctionRef(t *testing.T) { }, `SELECT * FROM read_csv(main.list_value('/path/to/AdBids.csv'), ("columns" = main.struct_pack(L := main.list_value('INT32', 'INT64'))))`, }, + { + "sqlite_scan", + `select * from AdBids`, + []*TableRef{ + { + Function: "sqlite_scan", + Paths: []string{"/path/to/data.db"}, + Properties: map[string]any{}, + Params: []string{"/path/to/data.db", "table"}, + }, + }, + `SELECT * FROM sqlite_scan('/path/to/data.db', 'table')`, + }, } for _, tt := range sqlVariations { diff --git a/runtime/pkg/duckdbsql/ast_traversal.go b/runtime/pkg/duckdbsql/ast_traversal.go index 723898e6715..4f96ec9e8bb 100644 --- a/runtime/pkg/duckdbsql/ast_traversal.go +++ b/runtime/pkg/duckdbsql/ast_traversal.go @@ -172,10 +172,31 @@ func (a *AST) traverseTableFunction(parent astNode, childKey string) { // TODO: add to local alias switch functionName { + case "sqlite_scan": + a.newFromNode(node, parent, childKey, ref) + ref.Params = make([]string, 0) + for _, argument := range arguments { + typ := toString(argument, astKeyType) + switch typ { + case "VALUE_CONSTANT": + ref.Params = append(ref.Params, getListOfValues[string](argument)...) + case "COLUMN_REF": + columnNames := toArray(argument, astKeyColumnNames) + for _, column := range columnNames { + ref.Params = append(ref.Params, column.(string)) + } + default: + } + } + if len(ref.Params) >= 1 { + // first param is path to local db file + ref.Paths = ref.Params[:1] + } + return case "read_csv_auto", "read_csv", "read_parquet", "read_json", "read_json_auto", "read_json_objects", "read_json_objects_auto", - "read_ndjson_objects", "read_ndjson", "read_ndjson_auto", "sqlite_scan": + "read_ndjson_objects", "read_ndjson", "read_ndjson_auto": ref.Paths = getListOfValues[string](arguments[0]) default: // only read_... are supported for now diff --git a/runtime/services/catalog/migrator/sources/sources.go b/runtime/services/catalog/migrator/sources/sources.go index 8cd3791e64b..79f35639bae 100644 --- a/runtime/services/catalog/migrator/sources/sources.go +++ b/runtime/services/catalog/migrator/sources/sources.go @@ -290,6 +290,8 @@ func mergeFromParsedQuery(apiSource *runtimev1.Source, env map[string]string, re return errors.New("invalid source, only a single path for source is supported") } + // TODO :: it looks at path to determine connector which is not correct for sqlite_scan + // but it works since behaviour is same as local_file p, c, ok := parseEmbeddedSourceConnector(ref.Paths[0]) if !ok { return errors.New("unknown source") @@ -334,6 +336,7 @@ func rewriteLocalRelativePath(ast *duckdbsql.AST, repoRoot string, allowRootAcce Function: table.Function, Paths: newPaths, Properties: table.Properties, + Params: table.Params, }, true }) if resolveErr != nil { @@ -385,8 +388,6 @@ func connectorVariables(src *runtimev1.Source, env map[string]string, repoRoot s case "motherduck": vars["token"] = env["token"] vars["dsn"] = "" - case "sqlite": - vars["dsn"] = "" case "local_file": vars["dsn"] = repoRoot case "bigquery": diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts index 9d1b5931ab0..ce10fcfb671 100644 --- a/web-common/src/features/sources/modal/yupSchemas.ts +++ b/web-common/src/features/sources/modal/yupSchemas.ts @@ -59,7 +59,8 @@ export function getYupSchema(connector: V1ConnectorSpec) { }); case "sqlite": return yup.object().shape({ - sql: yup.string().required("sql is required"), + db: yup.string().required("db is required"), + table: yup.string().required("table is required"), sourceName: yup .string() .matches( diff --git a/web-common/src/features/sources/sourceUtils.ts b/web-common/src/features/sources/sourceUtils.ts index 9289286d01d..f8fcfe50168 100644 --- a/web-common/src/features/sources/sourceUtils.ts +++ b/web-common/src/features/sources/sourceUtils.ts @@ -32,6 +32,9 @@ export function compileCreateSourceYAML( break; case "sqlite": connectorName = "duckdb"; + values.sql = `SELECT * FROM sqlite_scan('${values.db as string}', '${values.table as string}');` + delete values.db; + delete values.table; break; } From b32bbe9a3f46ed649c14c24926518d69e12c9ed3 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal Date: Tue, 26 Sep 2023 12:40:36 +0530 Subject: [PATCH 10/10] npx prettier write --- web-common/src/features/sources/sourceUtils.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/web-common/src/features/sources/sourceUtils.ts b/web-common/src/features/sources/sourceUtils.ts index f8fcfe50168..f534ff1424a 100644 --- a/web-common/src/features/sources/sourceUtils.ts +++ b/web-common/src/features/sources/sourceUtils.ts @@ -32,7 +32,9 @@ export function compileCreateSourceYAML( break; case "sqlite": connectorName = "duckdb"; - values.sql = `SELECT * FROM sqlite_scan('${values.db as string}', '${values.table as string}');` + values.sql = `SELECT * FROM sqlite_scan('${values.db as string}', '${ + values.table as string + }');`; delete values.db; delete values.table; break;