Skip to content

Commit

Permalink
Merge branch 'master' into redshift-ddl
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 authored Oct 7, 2024
2 parents 5389c23 + b808151 commit e78558a
Show file tree
Hide file tree
Showing 10 changed files with 46 additions and 31 deletions.
5 changes: 5 additions & 0 deletions clients/bigquery/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,11 @@ func (s *Store) Dedupe(tableID sql.TableIdentifier, primaryKeys []string, includ
return destination.ExecStatements(s, dedupeQueries)
}

func (s *Store) SweepTemporaryTables() error {
// BigQuery doesn't need to sweep temporary tables, since they support setting TTL on temporary tables.
return nil
}

func LoadBigQuery(cfg config.Config, _store *db.Store) (*Store, error) {
if _store != nil {
// Used for tests.
Expand Down
12 changes: 12 additions & 0 deletions clients/databricks/dialect/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"fmt"
"strings"

dbsql "github.com/databricks/databricks-sql-go"

"github.com/artie-labs/transfer/lib/config/constants"
"github.com/artie-labs/transfer/lib/sql"
"github.com/artie-labs/transfer/lib/typing"
Expand Down Expand Up @@ -143,6 +145,16 @@ WHEN NOT MATCHED AND IFNULL(%s, false) = false THEN INSERT (%s) VALUES (%s);`,
)}, nil
}

func (d DatabricksDialect) BuildSweepQuery(dbName, schemaName string) (string, []any) {
return fmt.Sprintf(`
SELECT
table_schema, table_name
FROM
%s.information_schema.tables
WHERE
UPPER(table_schema) = UPPER(:p_schema) AND table_name ILIKE :p_artie_prefix`, d.QuoteIdentifier(dbName)), []any{dbsql.Parameter{Name: "p_schema", Value: schemaName}, dbsql.Parameter{Name: "p_artie_prefix", Value: "%" + constants.ArtiePrefix + "%"}}
}

func (d DatabricksDialect) GetDefaultValueStrategy() sql.DefaultValueStrategy {
return sql.Native
}
14 changes: 14 additions & 0 deletions clients/databricks/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ func (s Store) IdentifierFor(topicConfig kafkalib.TopicConfig, table string) sql
}

func (s Store) Dialect() sql.Dialect {
return s.dialect()
}

func (s Store) dialect() dialect.DatabricksDialect {
return dialect.DatabricksDialect{}
}

Expand Down Expand Up @@ -184,6 +188,16 @@ func (s Store) writeTemporaryTableFile(tableData *optimization.TableData, newTab
return fp, writer.Error()
}

func (s Store) SweepTemporaryTables() error {
// TODO: We should also remove old volumes
tcs, err := s.cfg.TopicConfigs()
if err != nil {
return err
}

return shared.Sweep(s, tcs, s.dialect().BuildSweepQuery)
}

func LoadStore(cfg config.Config) (Store, error) {
store, err := db.Open("databricks", cfg.Databricks.DSN())
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion clients/mssql/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func (s *Store) IdentifierFor(topicConfig kafkalib.TopicConfig, table string) sq
return s.specificIdentifierFor(topicConfig, table)
}

func (s *Store) Sweep() error {
func (s *Store) SweepTemporaryTables() error {
tcs, err := s.config.TopicConfigs()
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion clients/redshift/redshift.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (s *Store) GetTableConfig(tableData *optimization.TableData) (*types.DwhTab
}.GetTableConfig()
}

func (s *Store) Sweep() error {
func (s *Store) SweepTemporaryTables() error {
tcs, err := s.config.TopicConfigs()
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion clients/snowflake/snowflake.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (s *Store) GetTableConfig(tableData *optimization.TableData) (*types.DwhTab
}.GetTableConfig()
}

func (s *Store) Sweep() error {
func (s *Store) SweepTemporaryTables() error {
tcs, err := s.config.TopicConfigs()
if err != nil {
return err
Expand Down
1 change: 1 addition & 0 deletions lib/destination/dwh.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type DataWarehouse interface {
// SQL specific commands
Dialect() sqllib.Dialect
Dedupe(tableID sqllib.TableIdentifier, primaryKeys []string, includeArtieUpdatedAt bool) error
SweepTemporaryTables() error
Exec(query string, args ...any) (sql.Result, error)
Query(query string, args ...any) (*sql.Rows, error)
Begin() (*sql.Tx, error)
Expand Down
27 changes: 3 additions & 24 deletions lib/destination/utils/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,36 +36,15 @@ func LoadBaseline(cfg config.Config) (destination.Baseline, error) {
func LoadDataWarehouse(cfg config.Config, store *db.Store) (destination.DataWarehouse, error) {
switch cfg.Output {
case constants.Snowflake:
s, err := snowflake.LoadSnowflake(cfg, store)
if err != nil {
return nil, err
}
if err = s.Sweep(); err != nil {
return nil, fmt.Errorf("failed to clean up Snowflake: %w", err)
}
return s, nil
return snowflake.LoadSnowflake(cfg, store)
case constants.BigQuery:
return bigquery.LoadBigQuery(cfg, store)
case constants.Databricks:
return databricks.LoadStore(cfg)
case constants.MSSQL:
s, err := mssql.LoadStore(cfg)
if err != nil {
return nil, err
}
if err = s.Sweep(); err != nil {
return nil, fmt.Errorf("failed to clean up MS SQL: %w", err)
}
return s, nil
return mssql.LoadStore(cfg)
case constants.Redshift:
s, err := redshift.LoadRedshift(cfg, store)
if err != nil {
return nil, err
}
if err = s.Sweep(); err != nil {
return nil, fmt.Errorf("failed to clean up Redshift: %w", err)
}
return s, nil
return redshift.LoadRedshift(cfg, store)
}

return nil, fmt.Errorf("invalid data warehouse output source specified: %q", cfg.Output)
Expand Down
10 changes: 7 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ func main() {
)

ctx := context.Background()

// Loading telemetry
metricsClient := metrics.LoadExporter(settings.Config)
var dest destination.Baseline
if utils.IsOutputBaseline(settings.Config) {
Expand All @@ -49,10 +47,16 @@ func main() {
logger.Fatal("Unable to load baseline destination", slog.Any("err", err))
}
} else {
dest, err = utils.LoadDataWarehouse(settings.Config, nil)
dwh, err := utils.LoadDataWarehouse(settings.Config, nil)
if err != nil {
logger.Fatal("Unable to load data warehouse destination", slog.Any("err", err))
}

if err = dwh.SweepTemporaryTables(); err != nil {
logger.Fatal("Failed to clean up temporary tables", slog.Any("err", err))
}

dest = dwh
}

inMemDB := models.NewMemoryDB()
Expand Down
2 changes: 1 addition & 1 deletion processes/consumer/flush.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func Flush(ctx context.Context, inMemDB *models.DatabaseData, dest destination.B
return
}

retryCfg, err := retry.NewJitterRetryConfig(500, 30_000, 10, retry.AlwaysRetry)
retryCfg, err := retry.NewJitterRetryConfig(1_000, 30_000, 15, retry.AlwaysRetry)
if err != nil {
slog.Error("Failed to create retry config", slog.Any("err", err))
return
Expand Down

0 comments on commit e78558a

Please sign in to comment.