diff --git a/README.md b/README.md index 896669539..ea1c4f8d9 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,12 @@

Artie Transfer

⚡️ Blazing fast data replication between OLTP and OLAP databases ⚡️

- - + +
- Learn more » + Learn more »
@@ -51,7 +51,7 @@ To run Artie Transfer's stack locally, please refer to the [examples folder](htt ## Getting started -[Getting started guide](https://docs.artie.so/running-transfer/overview) +[Getting started guide](https://docs.artie.com/running-transfer/overview) ## What is currently supported? Transfer is aiming to provide coverage across all OLTPs and OLAPs databases. Currently Transfer supports: @@ -60,14 +60,14 @@ Transfer is aiming to provide coverage across all OLTPs and OLAPs databases. Cur - Kafka (default) - Google Pub/Sub -- [Destinations](https://docs.artie.so/real-time-destinations/overview): +- [Destinations](https://docs.artie.com/real-time-destinations/overview): - Snowflake - BigQuery - Redshift - Microsoft SQL Server - S3 -- [Sources](https://docs.artie.so/real-time-sources/overview): +- [Sources](https://docs.artie.com/real-time-sources/overview): - MongoDB - DocumentDB - PostgreSQL @@ -77,15 +77,15 @@ Transfer is aiming to provide coverage across all OLTPs and OLAPs databases. Cur _If the database you are using is not on the list, feel free to file for a [feature request](https://github.com/artie-labs/transfer/issues/new)._ ## Configuration File -* [Artie Transfer configuration file guide](https://docs.artie.so/running-transfer/options) -* [Examples of configuration files](https://docs.artie.so/running-transfer/examples) +* [Artie Transfer configuration file guide](https://docs.artie.com/running-transfer/options) +* [Examples of configuration files](https://docs.artie.com/running-transfer/examples) ## Telemetry -[Artie Transfer's telemetry guide](https://docs.artie.so/telemetry/overview) +[Artie Transfer's telemetry guide](https://docs.artie.com/telemetry/overview) ## Tests -Transfer is written in Go and uses [counterfeiter](https://github.com/maxbrunsfeld/counterfeiter) to mock. +Transfer is written in Go and uses [counterfeiter](https://github.com/maxbrunsfeld/counterfeiter) to mock. To run the tests, run the following commands: ```sh diff --git a/clients/bigquery/append.go b/clients/bigquery/append.go deleted file mode 100644 index 8e4e221b0..000000000 --- a/clients/bigquery/append.go +++ /dev/null @@ -1,12 +0,0 @@ -package bigquery - -import ( - "github.com/artie-labs/transfer/clients/shared" - "github.com/artie-labs/transfer/lib/destination/types" - "github.com/artie-labs/transfer/lib/optimization" -) - -func (s *Store) Append(tableData *optimization.TableData) error { - tableID := s.IdentifierFor(tableData.TopicConfig(), tableData.Name()) - return shared.Append(s, tableData, types.AppendOpts{TempTableID: tableID}) -} diff --git a/clients/bigquery/bigquery.go b/clients/bigquery/bigquery.go index 51078d4ce..5d6fec060 100644 --- a/clients/bigquery/bigquery.go +++ b/clients/bigquery/bigquery.go @@ -41,17 +41,20 @@ type Store struct { db.Store } +func (s *Store) Append(tableData *optimization.TableData) error { + return shared.Append(s, tableData, types.AdditionalSettings{}) +} + func (s *Store) PrepareTemporaryTable(tableData *optimization.TableData, tableConfig *types.DwhTableConfig, tempTableID types.TableIdentifier, _ types.AdditionalSettings, createTempTable bool) error { if createTempTable { tempAlterTableArgs := ddl.AlterTableArgs{ - Dwh: s, - Tc: tableConfig, - TableID: tempTableID, - CreateTable: true, - TemporaryTable: true, - ColumnOp: constants.Add, - UppercaseEscNames: ptr.ToBool(s.ShouldUppercaseEscapedNames()), - Mode: tableData.Mode(), + Dwh: s, + Tc: tableConfig, + TableID: tempTableID, + CreateTable: true, + TemporaryTable: true, + ColumnOp: constants.Add, + Mode: tableData.Mode(), } if err := tempAlterTableArgs.AlterTable(tableData.ReadOnlyInMemoryCols().GetColumns()...); err != nil { @@ -119,10 +122,6 @@ func (s *Store) Dialect() sql.Dialect { return sql.BigQueryDialect{} } -func (s *Store) ShouldUppercaseEscapedNames() bool { - return false -} - func (s *Store) GetClient(ctx context.Context) *bigquery.Client { client, err := bigquery.NewClient(ctx, s.config.BigQuery.ProjectID) if err != nil { diff --git a/clients/mssql/staging.go b/clients/mssql/staging.go index f373fbfb6..c70813d1c 100644 --- a/clients/mssql/staging.go +++ b/clients/mssql/staging.go @@ -9,20 +9,18 @@ import ( "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" "github.com/artie-labs/transfer/lib/optimization" - "github.com/artie-labs/transfer/lib/ptr" ) func (s *Store) PrepareTemporaryTable(tableData *optimization.TableData, tableConfig *types.DwhTableConfig, tempTableID types.TableIdentifier, _ types.AdditionalSettings, createTempTable bool) error { if createTempTable { tempAlterTableArgs := ddl.AlterTableArgs{ - Dwh: s, - Tc: tableConfig, - TableID: tempTableID, - CreateTable: true, - TemporaryTable: true, - ColumnOp: constants.Add, - UppercaseEscNames: ptr.ToBool(s.ShouldUppercaseEscapedNames()), - Mode: tableData.Mode(), + Dwh: s, + Tc: tableConfig, + TableID: tempTableID, + CreateTable: true, + TemporaryTable: true, + ColumnOp: constants.Add, + Mode: tableData.Mode(), } if err := tempAlterTableArgs.AlterTable(tableData.ReadOnlyInMemoryCols().GetColumns()...); err != nil { diff --git a/clients/mssql/store.go b/clients/mssql/store.go index f48c26d40..81265d188 100644 --- a/clients/mssql/store.go +++ b/clients/mssql/store.go @@ -36,11 +36,7 @@ func (s *Store) Label() constants.DestinationKind { } func (s *Store) Dialect() sql.Dialect { - return sql.DefaultDialect{} -} - -func (s *Store) ShouldUppercaseEscapedNames() bool { - return false + return sql.MSSQLDialect{} } func (s *Store) Merge(tableData *optimization.TableData) error { @@ -48,8 +44,7 @@ func (s *Store) Merge(tableData *optimization.TableData) error { } func (s *Store) Append(tableData *optimization.TableData) error { - tableID := s.IdentifierFor(tableData.TopicConfig(), tableData.Name()) - return shared.Append(s, tableData, types.AppendOpts{TempTableID: tableID}) + return shared.Append(s, tableData, types.AdditionalSettings{}) } // specificIdentifierFor returns a MS SQL [TableIdentifier] for a [TopicConfig] + table name. diff --git a/clients/mssql/tableid.go b/clients/mssql/tableid.go index b5026d723..ddfe37a2f 100644 --- a/clients/mssql/tableid.go +++ b/clients/mssql/tableid.go @@ -7,7 +7,7 @@ import ( "github.com/artie-labs/transfer/lib/sql" ) -var dialect = sql.DefaultDialect{} +var dialect = sql.MSSQLDialect{} type TableIdentifier struct { schema string diff --git a/clients/redshift/redshift.go b/clients/redshift/redshift.go index 952d89007..4626f6cc3 100644 --- a/clients/redshift/redshift.go +++ b/clients/redshift/redshift.go @@ -30,6 +30,19 @@ type Store struct { db.Store } +func (s *Store) Append(tableData *optimization.TableData) error { + return shared.Append(s, tableData, types.AdditionalSettings{}) +} + +func (s *Store) Merge(tableData *optimization.TableData) error { + return shared.Merge(s, tableData, s.config, types.MergeOpts{ + UseMergeParts: true, + // We are adding SELECT DISTINCT here for the temporary table as an extra guardrail. + // Redshift does not enforce any row uniqueness and there could be potential LOAD errors which will cause duplicate rows to arise. + SubQueryDedupe: true, + }) +} + func (s *Store) IdentifierFor(topicConfig kafkalib.TopicConfig, table string) types.TableIdentifier { return NewTableIdentifier(topicConfig.Schema, table) } @@ -50,10 +63,6 @@ func (s *Store) Dialect() sql.Dialect { return sql.RedshiftDialect{} } -func (s *Store) ShouldUppercaseEscapedNames() bool { - return false -} - func (s *Store) GetTableConfig(tableData *optimization.TableData) (*types.DwhTableConfig, error) { const ( describeNameCol = "column_name" diff --git a/clients/redshift/staging.go b/clients/redshift/staging.go index bb6614baa..68d8311d2 100644 --- a/clients/redshift/staging.go +++ b/clients/redshift/staging.go @@ -12,25 +12,24 @@ import ( "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" "github.com/artie-labs/transfer/lib/optimization" - "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/s3lib" ) -func (s *Store) PrepareTemporaryTable(tableData *optimization.TableData, tableConfig *types.DwhTableConfig, tempTableID types.TableIdentifier, _ types.AdditionalSettings, _ bool) error { - // Redshift always creates a temporary table. - tempAlterTableArgs := ddl.AlterTableArgs{ - Dwh: s, - Tc: tableConfig, - TableID: tempTableID, - CreateTable: true, - TemporaryTable: true, - ColumnOp: constants.Add, - UppercaseEscNames: ptr.ToBool(s.ShouldUppercaseEscapedNames()), - Mode: tableData.Mode(), - } +func (s *Store) PrepareTemporaryTable(tableData *optimization.TableData, tableConfig *types.DwhTableConfig, tempTableID types.TableIdentifier, _ types.AdditionalSettings, createTempTable bool) error { + if createTempTable { + tempAlterTableArgs := ddl.AlterTableArgs{ + Dwh: s, + Tc: tableConfig, + TableID: tempTableID, + CreateTable: true, + TemporaryTable: true, + ColumnOp: constants.Add, + Mode: tableData.Mode(), + } - if err := tempAlterTableArgs.AlterTable(tableData.ReadOnlyInMemoryCols().GetColumns()...); err != nil { - return fmt.Errorf("failed to create temp table: %w", err) + if err := tempAlterTableArgs.AlterTable(tableData.ReadOnlyInMemoryCols().GetColumns()...); err != nil { + return fmt.Errorf("failed to create temp table: %w", err) + } } fp, err := s.loadTemporaryTable(tableData, tempTableID) diff --git a/clients/redshift/writes.go b/clients/redshift/writes.go deleted file mode 100644 index fcaf08e67..000000000 --- a/clients/redshift/writes.go +++ /dev/null @@ -1,34 +0,0 @@ -package redshift - -import ( - "fmt" - - "github.com/artie-labs/transfer/clients/shared" - "github.com/artie-labs/transfer/lib/destination/types" - "github.com/artie-labs/transfer/lib/optimization" -) - -func (s *Store) Append(tableData *optimization.TableData) error { - tableID := s.IdentifierFor(tableData.TopicConfig(), tableData.Name()) - - // Redshift is slightly different, we'll load and create the temporary table via shared.Append - // Then, we'll invoke `ALTER TABLE target APPEND FROM staging` to combine the diffs. - temporaryTableID := shared.TempTableID(tableID, tableData.TempTableSuffix()) - if err := shared.Append(s, tableData, types.AppendOpts{TempTableID: temporaryTableID}); err != nil { - return err - } - - _, err := s.Exec( - fmt.Sprintf(`ALTER TABLE %s APPEND FROM %s;`, tableID.FullyQualifiedName(), temporaryTableID.FullyQualifiedName()), - ) - return err -} - -func (s *Store) Merge(tableData *optimization.TableData) error { - return shared.Merge(s, tableData, s.config, types.MergeOpts{ - UseMergeParts: true, - // We are adding SELECT DISTINCT here for the temporary table as an extra guardrail. - // Redshift does not enforce any row uniqueness and there could be potential LOAD errors which will cause duplicate rows to arise. - SubQueryDedupe: true, - }) -} diff --git a/clients/shared/append.go b/clients/shared/append.go index 06f1e2172..d3d9bfecb 100644 --- a/clients/shared/append.go +++ b/clients/shared/append.go @@ -8,35 +8,38 @@ import ( "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" "github.com/artie-labs/transfer/lib/optimization" - "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/typing/columns" ) -func Append(dwh destination.DataWarehouse, tableData *optimization.TableData, opts types.AppendOpts) error { +func Append(dwh destination.DataWarehouse, tableData *optimization.TableData, opts types.AdditionalSettings) error { if tableData.ShouldSkipUpdate() { return nil } - tableID := dwh.IdentifierFor(tableData.TopicConfig(), tableData.Name()) tableConfig, err := dwh.GetTableConfig(tableData) if err != nil { return fmt.Errorf("failed to get table config: %w", err) } // We don't care about srcKeysMissing because we don't drop columns when we append. - _, targetKeysMissing := columns.Diff(tableData.ReadOnlyInMemoryCols(), tableConfig.Columns(), - tableData.TopicConfig().SoftDelete, tableData.TopicConfig().IncludeArtieUpdatedAt, - tableData.TopicConfig().IncludeDatabaseUpdatedAt, tableData.Mode()) + _, targetKeysMissing := columns.Diff( + tableData.ReadOnlyInMemoryCols(), + tableConfig.Columns(), + tableData.TopicConfig().SoftDelete, + tableData.TopicConfig().IncludeArtieUpdatedAt, + tableData.TopicConfig().IncludeDatabaseUpdatedAt, + tableData.Mode(), + ) + tableID := dwh.IdentifierFor(tableData.TopicConfig(), tableData.Name()) createAlterTableArgs := ddl.AlterTableArgs{ - Dwh: dwh, - Tc: tableConfig, - TableID: tableID, - CreateTable: tableConfig.CreateTable(), - ColumnOp: constants.Add, - CdcTime: tableData.LatestCDCTs, - UppercaseEscNames: ptr.ToBool(dwh.ShouldUppercaseEscapedNames()), - Mode: tableData.Mode(), + Dwh: dwh, + Tc: tableConfig, + TableID: tableID, + CreateTable: tableConfig.CreateTable(), + ColumnOp: constants.Add, + CdcTime: tableData.LatestCDCTs, + Mode: tableData.Mode(), } // Keys that exist in CDC stream, but not in DWH @@ -48,9 +51,11 @@ func Append(dwh destination.DataWarehouse, tableData *optimization.TableData, op return fmt.Errorf("failed to merge columns from destination: %w", err) } - additionalSettings := types.AdditionalSettings{ - AdditionalCopyClause: opts.AdditionalCopyClause, - } - - return dwh.PrepareTemporaryTable(tableData, tableConfig, opts.TempTableID, additionalSettings, false) + return dwh.PrepareTemporaryTable( + tableData, + tableConfig, + tableID, + opts, + false, + ) } diff --git a/clients/shared/merge.go b/clients/shared/merge.go index 970fc3652..7b3f06927 100644 --- a/clients/shared/merge.go +++ b/clients/shared/merge.go @@ -35,14 +35,13 @@ func Merge(dwh destination.DataWarehouse, tableData *optimization.TableData, cfg tableID := dwh.IdentifierFor(tableData.TopicConfig(), tableData.Name()) createAlterTableArgs := ddl.AlterTableArgs{ - Dwh: dwh, - Tc: tableConfig, - TableID: tableID, - CreateTable: tableConfig.CreateTable(), - ColumnOp: constants.Add, - CdcTime: tableData.LatestCDCTs, - UppercaseEscNames: ptr.ToBool(dwh.ShouldUppercaseEscapedNames()), - Mode: tableData.Mode(), + Dwh: dwh, + Tc: tableConfig, + TableID: tableID, + CreateTable: tableConfig.CreateTable(), + ColumnOp: constants.Add, + CdcTime: tableData.LatestCDCTs, + Mode: tableData.Mode(), } // Columns that are missing in DWH, but exist in our CDC stream. @@ -60,7 +59,6 @@ func Merge(dwh destination.DataWarehouse, tableData *optimization.TableData, cfg ColumnOp: constants.Delete, ContainOtherOperations: tableData.ContainOtherOperations(), CdcTime: tableData.LatestCDCTs, - UppercaseEscNames: ptr.ToBool(dwh.ShouldUppercaseEscapedNames()), Mode: tableData.Mode(), } @@ -95,7 +93,7 @@ func Merge(dwh destination.DataWarehouse, tableData *optimization.TableData, cfg for attempts := 0; attempts < backfillMaxRetries; attempts++ { backfillErr = BackfillColumn(cfg, dwh, col, tableID) if backfillErr == nil { - tableConfig.Columns().UpsertColumn(col.RawName(), columns.UpsertColumnArg{ + tableConfig.Columns().UpsertColumn(col.Name(), columns.UpsertColumnArg{ Backfilled: ptr.ToBool(true), }) break @@ -112,7 +110,7 @@ func Merge(dwh destination.DataWarehouse, tableData *optimization.TableData, cfg } if backfillErr != nil { - return fmt.Errorf("failed to backfill col: %s, default value: %v, err: %w", col.RawName(), col.RawDefaultValue(), backfillErr) + return fmt.Errorf("failed to backfill col: %s, default value: %v, err: %w", col.Name(), col.RawDefaultValue(), backfillErr) } } @@ -125,12 +123,11 @@ func Merge(dwh destination.DataWarehouse, tableData *optimization.TableData, cfg TableID: tableID, SubQuery: subQuery, IdempotentKey: tableData.TopicConfig().IdempotentKey, - PrimaryKeys: tableData.PrimaryKeys(dwh.ShouldUppercaseEscapedNames(), dwh.Label()), + PrimaryKeys: tableData.PrimaryKeys(), Columns: tableData.ReadOnlyInMemoryCols(), SoftDelete: tableData.TopicConfig().SoftDelete, DestKind: dwh.Label(), Dialect: dwh.Dialect(), - UppercaseEscNames: ptr.ToBool(dwh.ShouldUppercaseEscapedNames()), ContainsHardDeletes: ptr.ToBool(tableData.ContainsHardDeletes()), } diff --git a/clients/shared/table_config_test.go b/clients/shared/table_config_test.go index db45a710e..c85b9c053 100644 --- a/clients/shared/table_config_test.go +++ b/clients/shared/table_config_test.go @@ -75,7 +75,6 @@ func (MockDWH) PrepareTemporaryTable(tableData *optimization.TableData, tableCon func (MockDWH) IdentifierFor(topicConfig kafkalib.TopicConfig, name string) types.TableIdentifier { panic("not implemented") } -func (MockDWH) ShouldUppercaseEscapedNames() bool { return true } type MockTableIdentifier struct{ fqName string } diff --git a/clients/shared/utils.go b/clients/shared/utils.go index b3b236604..45e971ccf 100644 --- a/clients/shared/utils.go +++ b/clients/shared/utils.go @@ -25,12 +25,12 @@ func BackfillColumn(cfg config.Config, dwh destination.DataWarehouse, column col } additionalDateFmts := cfg.SharedTransferConfig.TypingSettings.AdditionalDateFormats - defaultVal, err := column.DefaultValue(&columns.DefaultValueArgs{Escape: true, DestKind: dwh.Label()}, additionalDateFmts) + defaultVal, err := column.DefaultValue(dwh.Dialect(), additionalDateFmts) if err != nil { return fmt.Errorf("failed to escape default value: %w", err) } - escapedCol := column.Name(dwh.ShouldUppercaseEscapedNames(), dwh.Label()) + escapedCol := dwh.Dialect().QuoteIdentifier(column.Name()) // TODO: This is added because `default` is not technically a column that requires escaping, but it is required when it's in the where clause. // Once we escape everything by default, we can remove this patch of code. @@ -45,7 +45,7 @@ func BackfillColumn(cfg config.Config, dwh destination.DataWarehouse, column col tableID.FullyQualifiedName(), escapedCol, defaultVal, additionalEscapedCol, ) slog.Info("Backfilling column", - slog.String("colName", column.RawName()), + slog.String("colName", column.Name()), slog.String("query", query), slog.String("table", tableID.FullyQualifiedName()), ) diff --git a/clients/snowflake/ddl_test.go b/clients/snowflake/ddl_test.go index c2e8a3e77..61c607576 100644 --- a/clients/snowflake/ddl_test.go +++ b/clients/snowflake/ddl_test.go @@ -41,7 +41,7 @@ func (s *SnowflakeTestSuite) TestMutateColumnsWithMemoryCacheDeletions() { nameCol := columns.NewColumn("name", typing.String) tc := s.stageStore.configMap.TableConfig(tableID) - val := tc.ShouldDeleteColumn(nameCol.RawName(), time.Now().Add(-1*6*time.Hour), true) + val := tc.ShouldDeleteColumn(nameCol.Name(), time.Now().Add(-1*6*time.Hour), true) assert.False(s.T(), val, "should not try to delete this column") assert.Equal(s.T(), len(s.stageStore.configMap.TableConfig(tableID).ReadOnlyColumnsToDelete()), 1) @@ -68,23 +68,23 @@ func (s *SnowflakeTestSuite) TestShouldDeleteColumn() { nameCol := columns.NewColumn("name", typing.String) // Let's try to delete name. - allowed := s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.RawName(), + allowed := s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.Name(), time.Now().Add(-1*(6*time.Hour)), true) assert.Equal(s.T(), allowed, false, "should not be allowed to delete") // Process tried to delete, but it's lagged. - allowed = s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.RawName(), + allowed = s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.Name(), time.Now().Add(-1*(6*time.Hour)), true) assert.Equal(s.T(), allowed, false, "should not be allowed to delete") // Process now caught up, and is asking if we can delete, should still be no. - allowed = s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.RawName(), time.Now(), true) + allowed = s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.Name(), time.Now(), true) assert.Equal(s.T(), allowed, false, "should not be allowed to delete still") // Process is finally ahead, has permission to delete now. - allowed = s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.RawName(), + allowed = s.stageStore.configMap.TableConfig(tableID).ShouldDeleteColumn(nameCol.Name(), time.Now().Add(2*constants.DeletionConfidencePadding), true) assert.Equal(s.T(), allowed, true, "should now be allowed to delete") diff --git a/clients/snowflake/snowflake.go b/clients/snowflake/snowflake.go index eec3b4dda..3cf716612 100644 --- a/clients/snowflake/snowflake.go +++ b/clients/snowflake/snowflake.go @@ -78,11 +78,7 @@ func (s *Store) Label() constants.DestinationKind { } func (s *Store) Dialect() sql.Dialect { - return sql.SnowflakeDialect{UppercaseEscNames: s.ShouldUppercaseEscapedNames()} -} - -func (s *Store) ShouldUppercaseEscapedNames() bool { - return s.config.SharedDestinationConfig.UppercaseEscapedNames + return sql.SnowflakeDialect{LegacyMode: !s.config.SharedDestinationConfig.UppercaseEscapedNames} } func (s *Store) GetConfigMap() *types.DwhToTablesConfigMap { @@ -132,14 +128,11 @@ func (s *Store) reestablishConnection() error { } func (s *Store) generateDedupeQueries(tableID, stagingTableID types.TableIdentifier, primaryKeys []string, topicConfig kafkalib.TopicConfig) []string { - var primaryKeysEscaped []string - for _, pk := range primaryKeys { - primaryKeysEscaped = append(primaryKeysEscaped, sql.EscapeNameIfNecessaryUsingDialect(pk, s.Dialect())) - } + primaryKeysEscaped := sql.QuoteIdentifiers(primaryKeys, s.Dialect()) orderColsToIterate := primaryKeysEscaped if topicConfig.IncludeArtieUpdatedAt { - orderColsToIterate = append(orderColsToIterate, sql.EscapeNameIfNecessaryUsingDialect(constants.UpdateColumnMarker, s.Dialect())) + orderColsToIterate = append(orderColsToIterate, s.Dialect().QuoteIdentifier(constants.UpdateColumnMarker)) } var orderByCols []string diff --git a/clients/snowflake/snowflake_dedupe_test.go b/clients/snowflake/snowflake_dedupe_test.go index b9b5aed83..549d39f17 100644 --- a/clients/snowflake/snowflake_dedupe_test.go +++ b/clients/snowflake/snowflake_dedupe_test.go @@ -20,10 +20,10 @@ func (s *SnowflakeTestSuite) TestGenerateDedupeQueries() { assert.Len(s.T(), parts, 3) assert.Equal( s.T(), - fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."CUSTOMERS" QUALIFY ROW_NUMBER() OVER (PARTITION BY id ORDER BY id ASC) = 2)`, stagingTableID.FullyQualifiedName()), + fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."CUSTOMERS" QUALIFY ROW_NUMBER() OVER (PARTITION BY "ID" ORDER BY "ID" ASC) = 2)`, stagingTableID.FullyQualifiedName()), parts[0], ) - assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."CUSTOMERS" t1 USING %s t2 WHERE t1.id = t2.id`, stagingTableID.FullyQualifiedName()), parts[1]) + assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."CUSTOMERS" t1 USING %s t2 WHERE t1."ID" = t2."ID"`, stagingTableID.FullyQualifiedName()), parts[1]) assert.Equal(s.T(), fmt.Sprintf(`INSERT INTO db.public."CUSTOMERS" SELECT * FROM %s`, stagingTableID.FullyQualifiedName()), parts[2]) } { @@ -35,10 +35,10 @@ func (s *SnowflakeTestSuite) TestGenerateDedupeQueries() { assert.Len(s.T(), parts, 3) assert.Equal( s.T(), - fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."CUSTOMERS" QUALIFY ROW_NUMBER() OVER (PARTITION BY id ORDER BY id ASC, __artie_updated_at ASC) = 2)`, stagingTableID.FullyQualifiedName()), + fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."CUSTOMERS" QUALIFY ROW_NUMBER() OVER (PARTITION BY "ID" ORDER BY "ID" ASC, "__ARTIE_UPDATED_AT" ASC) = 2)`, stagingTableID.FullyQualifiedName()), parts[0], ) - assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."CUSTOMERS" t1 USING %s t2 WHERE t1.id = t2.id`, stagingTableID.FullyQualifiedName()), parts[1]) + assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."CUSTOMERS" t1 USING %s t2 WHERE t1."ID" = t2."ID"`, stagingTableID.FullyQualifiedName()), parts[1]) assert.Equal(s.T(), fmt.Sprintf(`INSERT INTO db.public."CUSTOMERS" SELECT * FROM %s`, stagingTableID.FullyQualifiedName()), parts[2]) } { @@ -50,10 +50,10 @@ func (s *SnowflakeTestSuite) TestGenerateDedupeQueries() { assert.Len(s.T(), parts, 3) assert.Equal( s.T(), - fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."USER_SETTINGS" QUALIFY ROW_NUMBER() OVER (PARTITION BY user_id, settings ORDER BY user_id ASC, settings ASC) = 2)`, stagingTableID.FullyQualifiedName()), + fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."USER_SETTINGS" QUALIFY ROW_NUMBER() OVER (PARTITION BY "USER_ID", "SETTINGS" ORDER BY "USER_ID" ASC, "SETTINGS" ASC) = 2)`, stagingTableID.FullyQualifiedName()), parts[0], ) - assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."USER_SETTINGS" t1 USING %s t2 WHERE t1.user_id = t2.user_id AND t1.settings = t2.settings`, stagingTableID.FullyQualifiedName()), parts[1]) + assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."USER_SETTINGS" t1 USING %s t2 WHERE t1."USER_ID" = t2."USER_ID" AND t1."SETTINGS" = t2."SETTINGS"`, stagingTableID.FullyQualifiedName()), parts[1]) assert.Equal(s.T(), fmt.Sprintf(`INSERT INTO db.public."USER_SETTINGS" SELECT * FROM %s`, stagingTableID.FullyQualifiedName()), parts[2]) } { @@ -65,10 +65,10 @@ func (s *SnowflakeTestSuite) TestGenerateDedupeQueries() { assert.Len(s.T(), parts, 3) assert.Equal( s.T(), - fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."USER_SETTINGS" QUALIFY ROW_NUMBER() OVER (PARTITION BY user_id, settings ORDER BY user_id ASC, settings ASC, __artie_updated_at ASC) = 2)`, stagingTableID.FullyQualifiedName()), + fmt.Sprintf(`CREATE OR REPLACE TRANSIENT TABLE %s AS (SELECT * FROM db.public."USER_SETTINGS" QUALIFY ROW_NUMBER() OVER (PARTITION BY "USER_ID", "SETTINGS" ORDER BY "USER_ID" ASC, "SETTINGS" ASC, "__ARTIE_UPDATED_AT" ASC) = 2)`, stagingTableID.FullyQualifiedName()), parts[0], ) - assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."USER_SETTINGS" t1 USING %s t2 WHERE t1.user_id = t2.user_id AND t1.settings = t2.settings`, stagingTableID.FullyQualifiedName()), parts[1]) + assert.Equal(s.T(), fmt.Sprintf(`DELETE FROM db.public."USER_SETTINGS" t1 USING %s t2 WHERE t1."USER_ID" = t2."USER_ID" AND t1."SETTINGS" = t2."SETTINGS"`, stagingTableID.FullyQualifiedName()), parts[1]) assert.Equal(s.T(), fmt.Sprintf(`INSERT INTO db.public."USER_SETTINGS" SELECT * FROM %s`, stagingTableID.FullyQualifiedName()), parts[2]) } } diff --git a/clients/snowflake/snowflake_suite_test.go b/clients/snowflake/snowflake_suite_test.go index db66cf5df..c60454908 100644 --- a/clients/snowflake/snowflake_suite_test.go +++ b/clients/snowflake/snowflake_suite_test.go @@ -25,7 +25,12 @@ func (s *SnowflakeTestSuite) ResetStore() { s.fakeStageStore = &mocks.FakeStore{} stageStore := db.Store(s.fakeStageStore) var err error - s.stageStore, err = LoadSnowflake(config.Config{}, &stageStore) + s.stageStore, err = LoadSnowflake(config.Config{ + Snowflake: &config.Snowflake{}, + SharedDestinationConfig: config.SharedDestinationConfig{ + UppercaseEscapedNames: true, + }, + }, &stageStore) assert.NoError(s.T(), err) } diff --git a/clients/snowflake/staging.go b/clients/snowflake/staging.go index 91e07e286..bac39ef83 100644 --- a/clients/snowflake/staging.go +++ b/clients/snowflake/staging.go @@ -12,7 +12,7 @@ import ( "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" "github.com/artie-labs/transfer/lib/optimization" - "github.com/artie-labs/transfer/lib/ptr" + "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" "github.com/artie-labs/transfer/lib/typing/values" @@ -49,14 +49,13 @@ func castColValStaging(colVal any, colKind columns.Column, additionalDateFmts [] func (s *Store) PrepareTemporaryTable(tableData *optimization.TableData, tableConfig *types.DwhTableConfig, tempTableID types.TableIdentifier, additionalSettings types.AdditionalSettings, createTempTable bool) error { if createTempTable { tempAlterTableArgs := ddl.AlterTableArgs{ - Dwh: s, - Tc: tableConfig, - TableID: tempTableID, - CreateTable: true, - TemporaryTable: true, - ColumnOp: constants.Add, - UppercaseEscNames: ptr.ToBool(s.ShouldUppercaseEscapedNames()), - Mode: tableData.Mode(), + Dwh: s, + Tc: tableConfig, + TableID: tempTableID, + CreateTable: true, + TemporaryTable: true, + ColumnOp: constants.Add, + Mode: tableData.Mode(), } if err := tempAlterTableArgs.AlterTable(tableData.ReadOnlyInMemoryCols().GetColumns()...); err != nil { @@ -85,7 +84,7 @@ func (s *Store) PrepareTemporaryTable(tableData *optimization.TableData, tableCo // COPY the CSV file (in Snowflake) into a table copyCommand := fmt.Sprintf("COPY INTO %s (%s) FROM (SELECT %s FROM @%s)", tempTableID.FullyQualifiedName(), - strings.Join(tableData.ReadOnlyInMemoryCols().GetEscapedColumnsToUpdate(s.ShouldUppercaseEscapedNames(), s.Label()), ","), + strings.Join(sql.QuoteIdentifiers(tableData.ReadOnlyInMemoryCols().GetColumnsToUpdate(), s.Dialect()), ","), escapeColumns(tableData.ReadOnlyInMemoryCols(), ","), addPrefixToTableName(tempTableID, "%")) if additionalSettings.AdditionalCopyClause != "" { diff --git a/clients/snowflake/staging_test.go b/clients/snowflake/staging_test.go index 8d9c09566..f75bb9a90 100644 --- a/clients/snowflake/staging_test.go +++ b/clients/snowflake/staging_test.go @@ -79,14 +79,14 @@ func (s *SnowflakeTestSuite) TestBackfillColumn() { { name: "col that has default value that needs to be backfilled", col: needsBackfillCol, - backfillSQL: `UPDATE db.public."TABLENAME" SET foo = true WHERE foo IS NULL;`, - commentSQL: `COMMENT ON COLUMN db.public."TABLENAME".foo IS '{"backfilled": true}';`, + backfillSQL: `UPDATE db.public."TABLENAME" SET "FOO" = true WHERE "FOO" IS NULL;`, + commentSQL: `COMMENT ON COLUMN db.public."TABLENAME"."FOO" IS '{"backfilled": true}';`, }, { name: "default col that has default value that needs to be backfilled", col: needsBackfillColDefault, - backfillSQL: `UPDATE db.public."TABLENAME" SET default = true WHERE "DEFAULT" IS NULL;`, - commentSQL: `COMMENT ON COLUMN db.public."TABLENAME".default IS '{"backfilled": true}';`, + backfillSQL: `UPDATE db.public."TABLENAME" SET "DEFAULT" = true WHERE "DEFAULT" IS NULL;`, + commentSQL: `COMMENT ON COLUMN db.public."TABLENAME"."DEFAULT" IS '{"backfilled": true}';`, }, } @@ -147,7 +147,7 @@ func (s *SnowflakeTestSuite) TestPrepareTempTable() { createQuery, _ := s.fakeStageStore.ExecArgsForCall(0) prefixQuery := fmt.Sprintf( - `CREATE TABLE IF NOT EXISTS %s (user_id string,first_name string,last_name string,dusty string) STAGE_COPY_OPTIONS = ( PURGE = TRUE ) STAGE_FILE_FORMAT = ( TYPE = 'csv' FIELD_DELIMITER= '\t' FIELD_OPTIONALLY_ENCLOSED_BY='"' NULL_IF='\\N' EMPTY_FIELD_AS_NULL=FALSE)`, tempTableName) + `CREATE TABLE IF NOT EXISTS %s ("USER_ID" string,"FIRST_NAME" string,"LAST_NAME" string,"DUSTY" string) STAGE_COPY_OPTIONS = ( PURGE = TRUE ) STAGE_FILE_FORMAT = ( TYPE = 'csv' FIELD_DELIMITER= '\t' FIELD_OPTIONALLY_ENCLOSED_BY='"' NULL_IF='\\N' EMPTY_FIELD_AS_NULL=FALSE)`, tempTableName) containsPrefix := strings.HasPrefix(createQuery, prefixQuery) assert.True(s.T(), containsPrefix, fmt.Sprintf("createQuery:%v, prefixQuery:%s", createQuery, prefixQuery)) resourceName := addPrefixToTableName(tempTableID, "%") @@ -157,7 +157,7 @@ func (s *SnowflakeTestSuite) TestPrepareTempTable() { assert.Contains(s.T(), putQuery, fmt.Sprintf("@%s AUTO_COMPRESS=TRUE", resourceName)) // Third call is a COPY INTO copyQuery, _ := s.fakeStageStore.ExecArgsForCall(2) - assert.Equal(s.T(), fmt.Sprintf(`COPY INTO %s (user_id,first_name,last_name,dusty) FROM (SELECT $1,$2,$3,$4 FROM @%s)`, + assert.Equal(s.T(), fmt.Sprintf(`COPY INTO %s ("USER_ID","FIRST_NAME","LAST_NAME","DUSTY") FROM (SELECT $1,$2,$3,$4 FROM @%s)`, tempTableName, resourceName), copyQuery) } { diff --git a/clients/snowflake/tableid.go b/clients/snowflake/tableid.go index 662b97f75..ec9cbb2db 100644 --- a/clients/snowflake/tableid.go +++ b/clients/snowflake/tableid.go @@ -7,7 +7,7 @@ import ( "github.com/artie-labs/transfer/lib/sql" ) -var dialect = sql.SnowflakeDialect{UppercaseEscNames: true} +var dialect = sql.SnowflakeDialect{} type TableIdentifier struct { database string diff --git a/clients/snowflake/writes.go b/clients/snowflake/writes.go index f5f48f0b9..b5ca06e98 100644 --- a/clients/snowflake/writes.go +++ b/clients/snowflake/writes.go @@ -25,10 +25,8 @@ func (s *Store) Append(tableData *optimization.TableData) error { } } - tableID := s.IdentifierFor(tableData.TopicConfig(), tableData.Name()) // TODO: For history mode - in the future, we could also have a separate stage name for history mode so we can enable parallel processing. - err = shared.Append(s, tableData, types.AppendOpts{ - TempTableID: tableID, + err = shared.Append(s, tableData, types.AdditionalSettings{ AdditionalCopyClause: `FILE_FORMAT = (TYPE = 'csv' FIELD_DELIMITER= '\t' FIELD_OPTIONALLY_ENCLOSED_BY='"' NULL_IF='\\N' EMPTY_FIELD_AS_NULL=FALSE) PURGE = TRUE`, }) } diff --git a/examples/mongodb/README.md b/examples/mongodb/README.md index ec7c92e57..cd3730feb 100644 --- a/examples/mongodb/README.md +++ b/examples/mongodb/README.md @@ -30,8 +30,8 @@ docker-compose -f docker-compose.yaml exec mongodb bash -c '/usr/local/bin/init- # Now, if you want to connect to the Mongo shell and insert more data, go right ahead docker-compose -f docker-compose.yaml exec mongodb bash -c 'mongo -u $MONGODB_USER -p $MONGODB_PASSWORD --authenticationDatabase admin inventory' db.customers.insert([ - { _id : NumberLong("1020"), first_name : 'Robin', - last_name : 'Tang', email : 'robin@artie.so', unique_id : UUID(), + { _id : NumberLong("1020"), first_name : 'Robin', + last_name : 'Tang', email : 'robin@example.com', unique_id : UUID(), test_bool_false: false, test_bool_true: true, new_id: ObjectId(), test_decimal: NumberDecimal("13.37"), test_int: NumberInt("1337"), test_decimal_2: 13.37, test_list: [1, 2, 3, 4, "hello"], test_null: null, test_ts: Timestamp(42, 1), test_nested_object: {a: { b: { c: "hello"}}}} diff --git a/examples/pubsub_postgres/README.md b/examples/pubsub_postgres/README.md index 8ae4e77b4..bd56163c0 100644 --- a/examples/pubsub_postgres/README.md +++ b/examples/pubsub_postgres/README.md @@ -1,5 +1,5 @@ # Postgres Example -This example requires additional configuration on the Pub/Sub side. +This example requires additional configuration on the Pub/Sub side. -Please see https://docs.artie.so/tutorials/setting-up-pub-sub for further details. +Please see https://docs.artie.com/tutorials/setting-up-pub-sub for further details. diff --git a/lib/array/strings.go b/lib/array/strings.go index 4d66c844b..69269c75d 100644 --- a/lib/array/strings.go +++ b/lib/array/strings.go @@ -47,7 +47,7 @@ func InterfaceToArrayString(val any, recastAsArray bool) ([]string, error) { vals = append(vals, string(bytes)) } else { - vals = append(vals, stringutil.Wrap(value, true)) + vals = append(vals, stringutil.EscapeBackslashes(fmt.Sprint(value))) } } diff --git a/lib/cdc/mongo/debezium_test.go b/lib/cdc/mongo/debezium_test.go index d01e21726..eba0dc9a2 100644 --- a/lib/cdc/mongo/debezium_test.go +++ b/lib/cdc/mongo/debezium_test.go @@ -142,7 +142,7 @@ func (p *MongoTestSuite) TestMongoDBEventCustomer() { "schema": {}, "payload": { "before": null, - "after": "{\"_id\": {\"$numberLong\": \"1003\"},\"first_name\": \"Robin\",\"last_name\": \"Tang\",\"email\": \"robin@artie.so\", \"nested\": {\"object\": \"foo\"}}", + "after": "{\"_id\": {\"$numberLong\": \"1003\"},\"first_name\": \"Robin\",\"last_name\": \"Tang\",\"email\": \"robin@example.com\", \"nested\": {\"object\": \"foo\"}}", "patch": null, "filter": null, "updateDescription": null, @@ -176,7 +176,7 @@ func (p *MongoTestSuite) TestMongoDBEventCustomer() { assert.Equal(p.T(), evtData["_id"], 1003) assert.Equal(p.T(), evtData["first_name"], "Robin") assert.Equal(p.T(), evtData["last_name"], "Tang") - assert.Equal(p.T(), evtData["email"], "robin@artie.so") + assert.Equal(p.T(), evtData["email"], "robin@example.com") evtDataWithIncludedAt, err := evt.GetData(map[string]any{"_id": 1003}, &kafkalib.TopicConfig{}) assert.NoError(p.T(), err) diff --git a/lib/cdc/mysql/debezium_test.go b/lib/cdc/mysql/debezium_test.go index 783ab83c8..49f6fb760 100644 --- a/lib/cdc/mysql/debezium_test.go +++ b/lib/cdc/mysql/debezium_test.go @@ -353,7 +353,7 @@ func (m *MySQLTestSuite) TestGetEventFromBytes() { col, isOk := cols.GetColumn("abcdef") assert.True(m.T(), isOk) - assert.Equal(m.T(), "abcdef", col.RawName()) + assert.Equal(m.T(), "abcdef", col.Name()) for key := range evtData { if strings.Contains(key, constants.ArtiePrefix) { continue @@ -361,6 +361,6 @@ func (m *MySQLTestSuite) TestGetEventFromBytes() { col, isOk = cols.GetColumn(strings.ToLower(key)) assert.Equal(m.T(), true, isOk, key) - assert.Equal(m.T(), typing.Invalid, col.KindDetails, fmt.Sprintf("colName: %v, evtData key: %v", col.RawName(), key)) + assert.Equal(m.T(), typing.Invalid, col.KindDetails, fmt.Sprintf("colName: %v, evtData key: %v", col.Name(), key)) } } diff --git a/lib/cdc/util/relational_data_test.go b/lib/cdc/util/relational_data_test.go index beeb05186..f85253e13 100644 --- a/lib/cdc/util/relational_data_test.go +++ b/lib/cdc/util/relational_data_test.go @@ -3,7 +3,7 @@ package util const ( MySQLDelete = `{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.customers.Envelope","version":1},"payload":{"before":{"id":1004,"first_name":"Anne","last_name":"Kretchmar","email":"annek@noanswer.org"},"after":null,"source":{"version":"2.0.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1711381272000,"snapshot":"false","db":"inventory","sequence":null,"table":"customers","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":569,"row":0,"thread":11,"query":null},"op":"d","ts_ms":1711381272702,"transaction":null}}` MySQLUpdate = `{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.customers.Envelope","version":1},"payload":{"before":{"id":1003,"first_name":"Edward","last_name":"Walker","email":"ed@walker.com"},"after":{"id":1003,"first_name":"Dusty","last_name":"Walker","email":"ed@walker.com"},"source":{"version":"2.0.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1711381320000,"snapshot":"false","db":"inventory","sequence":null,"table":"customers","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":912,"row":0,"thread":11,"query":null},"op":"u","ts_ms":1711381320962,"transaction":null}}` - MySQLInsert = `{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.customers.Envelope","version":1},"payload":{"before":null,"after":{"id":1005,"first_name":"The Dust","last_name":"Tang","email":"dusty@artie.so"},"source":{"version":"2.0.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1711381357000,"snapshot":"false","db":"inventory","sequence":null,"table":"customers","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":1276,"row":0,"thread":11,"query":null},"op":"c","ts_ms":1711381357622,"transaction":null}}` + MySQLInsert = `{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":true,"field":"table"},{"type":"int64","optional":false,"field":"server_id"},{"type":"string","optional":true,"field":"gtid"},{"type":"string","optional":false,"field":"file"},{"type":"int64","optional":false,"field":"pos"},{"type":"int32","optional":false,"field":"row"},{"type":"int64","optional":true,"field":"thread"},{"type":"string","optional":true,"field":"query"}],"optional":false,"name":"io.debezium.connector.mysql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.customers.Envelope","version":1},"payload":{"before":null,"after":{"id":1005,"first_name":"The Dust","last_name":"Tang","email":"dusty@example.com"},"source":{"version":"2.0.1.Final","connector":"mysql","name":"dbserver1","ts_ms":1711381357000,"snapshot":"false","db":"inventory","sequence":null,"table":"customers","server_id":223344,"gtid":null,"file":"mysql-bin.000003","pos":1276,"row":0,"thread":11,"query":null},"op":"c","ts_ms":1711381357622,"transaction":null}}` PostgresDelete = `{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"default":0,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"default":0,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":false,"field":"schema"},{"type":"string","optional":false,"field":"table"},{"type":"int64","optional":true,"field":"txId"},{"type":"int64","optional":true,"field":"lsn"},{"type":"int64","optional":true,"field":"xmin"}],"optional":false,"name":"io.debezium.connector.postgresql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.customers.Envelope","version":1},"payload":{"before":{"id":1004,"first_name":"Anne","last_name":"Kretchmar","email":"annek@noanswer.org"},"after":null,"source":{"version":"2.5.0.Final","connector":"postgresql","name":"dbserver1","ts_ms":1711381709158,"snapshot":"false","db":"postgres","sequence":"[null,\"36450928\"]","schema":"inventory","table":"customers","txId":792,"lsn":36450928,"xmin":null},"op":"d","ts_ms":1711381709586,"transaction":null}}` PostgresUpdate = `{"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"default":0,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"},{"type":"boolean","optional":true,"field":"boolean_test"},{"type":"boolean","optional":true,"field":"bool_test"},{"type":"boolean","optional":true,"field":"bit_test"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"scale"},{"type":"bytes","optional":false,"field":"value"}],"optional":true,"name":"io.debezium.data.VariableScaleDecimal","version":1,"doc":"Variable scaled decimal","field":"numeric_test"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"0","connect.decimal.precision":"5"},"field":"numeric_5"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"2","connect.decimal.precision":"5"},"field":"numeric_5_2"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"6","connect.decimal.precision":"5"},"field":"numeric_5_6"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"0","connect.decimal.precision":"5"},"field":"numeric_5_0"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"0","connect.decimal.precision":"39"},"field":"numeric_39_0"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"2","connect.decimal.precision":"39"},"field":"numeric_39_2"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"6","connect.decimal.precision":"39"},"field":"numeric_39_6"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"default":0,"field":"id"},{"type":"string","optional":false,"field":"first_name"},{"type":"string","optional":false,"field":"last_name"},{"type":"string","optional":false,"field":"email"},{"type":"boolean","optional":true,"field":"boolean_test"},{"type":"boolean","optional":true,"field":"bool_test"},{"type":"boolean","optional":true,"field":"bit_test"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"scale"},{"type":"bytes","optional":false,"field":"value"}],"optional":true,"name":"io.debezium.data.VariableScaleDecimal","version":1,"doc":"Variable scaled decimal","field":"numeric_test"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"0","connect.decimal.precision":"5"},"field":"numeric_5"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"2","connect.decimal.precision":"5"},"field":"numeric_5_2"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"6","connect.decimal.precision":"5"},"field":"numeric_5_6"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"0","connect.decimal.precision":"5"},"field":"numeric_5_0"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"0","connect.decimal.precision":"39"},"field":"numeric_39_0"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"2","connect.decimal.precision":"39"},"field":"numeric_39_2"},{"type":"bytes","optional":true,"name":"org.apache.kafka.connect.data.Decimal","version":1,"parameters":{"scale":"6","connect.decimal.precision":"39"},"field":"numeric_39_6"}],"optional":true,"name":"dbserver1.inventory.customers.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":false,"field":"schema"},{"type":"string","optional":false,"field":"table"},{"type":"int64","optional":true,"field":"txId"},{"type":"int64","optional":true,"field":"lsn"},{"type":"int64","optional":true,"field":"xmin"}],"optional":false,"name":"io.debezium.connector.postgresql.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"dbserver1.inventory.customers.Envelope","version":1},"payload":{"before":{"id":1001,"first_name":"Sally","last_name":"Thomas","email":"sally.thomas@acme.com","boolean_test":true,"bool_test":false,"bit_test":false,"numeric_test":null,"numeric_5":null,"numeric_5_2":null,"numeric_5_6":null,"numeric_5_0":null,"numeric_39_0":null,"numeric_39_2":null,"numeric_39_6":null},"after":{"id":1001,"first_name":"Sally","last_name":"Thomas","email":"sally.thomas@acme.com","boolean_test":true,"bool_test":false,"bit_test":false,"numeric_test":{"scale":3,"value":"B1vNFQ=="},"numeric_5":"BNI=","numeric_5_2":"AOHJ","numeric_5_6":"W6A=","numeric_5_0":"BQ==","numeric_39_0":"LA//uAAAAAAAAAAAAAAAAA==","numeric_39_2":"LBAD0S5LtA8eEEfNAAAAFg==","numeric_39_6":"HOB1x8wbGatWdikB4kA="},"source":{"version":"2.5.0.Final","connector":"postgresql","name":"dbserver1","ts_ms":1711381838401,"snapshot":"false","db":"postgres","sequence":"[\"37133376\",\"37158360\"]","schema":"inventory","table":"customers","txId":806,"lsn":37158360,"xmin":null},"op":"u","ts_ms":1711381838845,"transaction":null}}` diff --git a/lib/cdc/util/relational_event_test.go b/lib/cdc/util/relational_event_test.go index e5a421ed7..72d903864 100644 --- a/lib/cdc/util/relational_event_test.go +++ b/lib/cdc/util/relational_event_test.go @@ -74,8 +74,8 @@ func TestSource_GetOptionalSchema(t *testing.T) { for _, _col := range cols.GetColumns() { // All the other columns do not have a default value. - if _col.RawName() != "boolean_column" { - assert.Nil(t, _col.RawDefaultValue(), _col.RawName()) + if _col.Name() != "boolean_column" { + assert.Nil(t, _col.RawDefaultValue(), _col.Name()) } } } diff --git a/lib/destination/ddl/ddl.go b/lib/destination/ddl/ddl.go index 723238ac6..675dc5941 100644 --- a/lib/destination/ddl/ddl.go +++ b/lib/destination/ddl/ddl.go @@ -44,7 +44,6 @@ type AlterTableArgs struct { TableID types.TableIdentifier CreateTable bool TemporaryTable bool - UppercaseEscNames *bool ColumnOp constants.ColumnOperation Mode config.Mode @@ -69,10 +68,6 @@ func (a AlterTableArgs) Validate() error { } } - if a.UppercaseEscNames == nil { - return fmt.Errorf("uppercaseEscNames cannot be nil") - } - return nil } @@ -96,7 +91,7 @@ func (a AlterTableArgs) AlterTable(cols ...columns.Column) error { } if a.ColumnOp == constants.Delete { - if !a.Tc.ShouldDeleteColumn(col.RawName(), a.CdcTime, a.ContainOtherOperations) { + if !a.Tc.ShouldDeleteColumn(col.Name(), a.CdcTime, a.ContainOtherOperations) { continue } } @@ -104,7 +99,7 @@ func (a AlterTableArgs) AlterTable(cols ...columns.Column) error { mutateCol = append(mutateCol, col) switch a.ColumnOp { case constants.Add: - colName := col.Name(*a.UppercaseEscNames, a.Dwh.Label()) + colName := a.Dwh.Dialect().QuoteIdentifier(col.Name()) if col.PrimaryKey() && a.Mode != config.History { // Don't create a PK for history mode because it's append-only, so the primary key should not be enforced. @@ -113,7 +108,7 @@ func (a AlterTableArgs) AlterTable(cols ...columns.Column) error { colSQLParts = append(colSQLParts, fmt.Sprintf(`%s %s`, colName, typing.KindToDWHType(col.KindDetails, a.Dwh.Label(), col.PrimaryKey()))) case constants.Delete: - colSQLParts = append(colSQLParts, col.Name(*a.UppercaseEscNames, a.Dwh.Label())) + colSQLParts = append(colSQLParts, a.Dwh.Dialect().QuoteIdentifier(col.Name())) } } diff --git a/lib/destination/ddl/ddl_alter_delete_test.go b/lib/destination/ddl/ddl_alter_delete_test.go index 02a15cc07..aa0f2c699 100644 --- a/lib/destination/ddl/ddl_alter_delete_test.go +++ b/lib/destination/ddl/ddl_alter_delete_test.go @@ -2,12 +2,11 @@ package ddl_test import ( "fmt" + "strings" "time" "github.com/artie-labs/transfer/lib/config" - "github.com/artie-labs/transfer/lib/ptr" - "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" @@ -68,7 +67,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -89,7 +87,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -111,7 +108,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -146,7 +142,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: false, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -167,7 +162,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: false, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -188,7 +182,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: false, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -225,7 +218,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -242,7 +234,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -259,7 +250,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -284,7 +274,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts.Add(2 * constants.DeletionConfidencePadding), - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -299,7 +288,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts.Add(2 * constants.DeletionConfidencePadding), - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -314,7 +302,6 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: ts.Add(2 * constants.DeletionConfidencePadding), - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -339,7 +326,7 @@ func (d *DDLTestSuite) TestAlterDelete_Complete() { execQuery, _ := d.fakeSnowflakeStagesStore.ExecArgsForCall(0) var found bool for key := range allColsMap { - if execQuery == fmt.Sprintf("ALTER TABLE %s drop COLUMN %s", snowflakeName, key) { + if execQuery == fmt.Sprintf(`ALTER TABLE %s drop COLUMN "%s"`, snowflakeName, strings.ToUpper(key)) { found = true } } diff --git a/lib/destination/ddl/ddl_bq_test.go b/lib/destination/ddl/ddl_bq_test.go index 62c6a19bc..babea1977 100644 --- a/lib/destination/ddl/ddl_bq_test.go +++ b/lib/destination/ddl/ddl_bq_test.go @@ -9,8 +9,6 @@ import ( "github.com/artie-labs/transfer/clients/bigquery" "github.com/artie-labs/transfer/lib/config" - "github.com/artie-labs/transfer/lib/ptr" - "github.com/artie-labs/transfer/lib/typing/columns" "github.com/stretchr/testify/assert" @@ -61,7 +59,6 @@ func (d *DDLTestSuite) TestAlterTableDropColumnsBigQuery() { ColumnOp: constants.Delete, ContainOtherOperations: true, CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -84,13 +81,12 @@ func (d *DDLTestSuite) TestAlterTableDropColumnsBigQuery() { ColumnOp: constants.Delete, ContainOtherOperations: true, CdcTime: ts.Add(2 * constants.DeletionConfidencePadding), - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(column)) query, _ := d.fakeBigQueryStore.ExecArgsForCall(callIdx) - assert.Equal(d.T(), fmt.Sprintf("ALTER TABLE %s drop COLUMN %s", fqName, column.Name(false, d.bigQueryStore.Label())), query) + assert.Equal(d.T(), fmt.Sprintf("ALTER TABLE %s drop COLUMN %s", fqName, d.bigQueryStore.Dialect().QuoteIdentifier(column.Name())), query) callIdx += 1 } @@ -134,21 +130,20 @@ func (d *DDLTestSuite) TestAlterTableAddColumns() { tc := d.bigQueryStore.GetConfigMap().TableConfig(tableID) for name, kind := range newCols { alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.bigQueryStore, - Tc: tc, - TableID: tableID, - CreateTable: tc.CreateTable(), - ColumnOp: constants.Add, - CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.bigQueryStore, + Tc: tc, + TableID: tableID, + CreateTable: tc.CreateTable(), + ColumnOp: constants.Add, + CdcTime: ts, + Mode: config.Replication, } col := columns.NewColumn(name, kind) assert.NoError(d.T(), alterTableArgs.AlterTable(col)) query, _ := d.fakeBigQueryStore.ExecArgsForCall(callIdx) - assert.Equal(d.T(), fmt.Sprintf("ALTER TABLE %s %s COLUMN %s %s", fqName, constants.Add, col.Name(false, d.bigQueryStore.Label()), + assert.Equal(d.T(), fmt.Sprintf("ALTER TABLE %s %s COLUMN %s %s", fqName, constants.Add, d.bigQueryStore.Dialect().QuoteIdentifier(col.Name()), typing.KindToDWHType(kind, d.bigQueryStore.Label(), false)), query) callIdx += 1 } @@ -157,10 +152,10 @@ func (d *DDLTestSuite) TestAlterTableAddColumns() { assert.Equal(d.T(), newColsLen+existingColsLen, len(d.bigQueryStore.GetConfigMap().TableConfig(tableID).Columns().GetColumns()), d.bigQueryStore.GetConfigMap().TableConfig(tableID).Columns()) // Check by iterating over the columns for _, column := range d.bigQueryStore.GetConfigMap().TableConfig(tableID).Columns().GetColumns() { - existingCol, isOk := existingCols.GetColumn(column.RawName()) + existingCol, isOk := existingCols.GetColumn(column.Name()) if !isOk { // Check new cols? - existingCol.KindDetails, isOk = newCols[column.RawName()] + existingCol.KindDetails, isOk = newCols[column.Name()] } assert.True(d.T(), isOk) @@ -196,19 +191,18 @@ func (d *DDLTestSuite) TestAlterTableAddColumnsSomeAlreadyExist() { // BQ returning the same error because the column already exists. d.fakeBigQueryStore.ExecReturnsOnCall(0, sqlResult, errors.New("Column already exists: _string at [1:39]")) alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.bigQueryStore, - Tc: tc, - TableID: tableID, - CreateTable: tc.CreateTable(), - ColumnOp: constants.Add, - CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.bigQueryStore, + Tc: tc, + TableID: tableID, + CreateTable: tc.CreateTable(), + ColumnOp: constants.Add, + CdcTime: ts, + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(column)) query, _ := d.fakeBigQueryStore.ExecArgsForCall(callIdx) - assert.Equal(d.T(), fmt.Sprintf("ALTER TABLE %s %s COLUMN %s %s", fqName, constants.Add, column.Name(false, d.bigQueryStore.Label()), + assert.Equal(d.T(), fmt.Sprintf("ALTER TABLE %s %s COLUMN %s %s", fqName, constants.Add, d.bigQueryStore.Dialect().QuoteIdentifier(column.Name()), typing.KindToDWHType(column.KindDetails, d.bigQueryStore.Label(), false)), query) callIdx += 1 } @@ -217,7 +211,7 @@ func (d *DDLTestSuite) TestAlterTableAddColumnsSomeAlreadyExist() { assert.Equal(d.T(), existingColsLen, len(d.bigQueryStore.GetConfigMap().TableConfig(tableID).Columns().GetColumns()), d.bigQueryStore.GetConfigMap().TableConfig(tableID).Columns()) // Check by iterating over the columns for _, column := range d.bigQueryStore.GetConfigMap().TableConfig(tableID).Columns().GetColumns() { - existingCol, isOk := existingCols.GetColumn(column.RawName()) + existingCol, isOk := existingCols.GetColumn(column.Name()) assert.True(d.T(), isOk) assert.Equal(d.T(), column.KindDetails, existingCol.KindDetails) } @@ -250,14 +244,13 @@ func (d *DDLTestSuite) TestAlterTableDropColumnsBigQuerySafety() { assert.Equal(d.T(), 0, len(d.bigQueryStore.GetConfigMap().TableConfig(tableID).ReadOnlyColumnsToDelete()), d.bigQueryStore.GetConfigMap().TableConfig(tableID).ReadOnlyColumnsToDelete()) for _, column := range cols.GetColumns() { alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.bigQueryStore, - Tc: tc, - TableID: tableID, - CreateTable: tc.CreateTable(), - ColumnOp: constants.Delete, - CdcTime: ts, - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.bigQueryStore, + Tc: tc, + TableID: tableID, + CreateTable: tc.CreateTable(), + ColumnOp: constants.Delete, + CdcTime: ts, + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(column)) } @@ -268,14 +261,13 @@ func (d *DDLTestSuite) TestAlterTableDropColumnsBigQuerySafety() { // Now try to delete again and with an increased TS. It should now be all deleted. for _, column := range cols.GetColumns() { alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.bigQueryStore, - Tc: tc, - TableID: tableID, - CreateTable: tc.CreateTable(), - ColumnOp: constants.Delete, - CdcTime: ts.Add(2 * constants.DeletionConfidencePadding), - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.bigQueryStore, + Tc: tc, + TableID: tableID, + CreateTable: tc.CreateTable(), + ColumnOp: constants.Delete, + CdcTime: ts.Add(2 * constants.DeletionConfidencePadding), + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(column)) diff --git a/lib/destination/ddl/ddl_create_table_test.go b/lib/destination/ddl/ddl_create_table_test.go index 44209ecfc..f67f988b0 100644 --- a/lib/destination/ddl/ddl_create_table_test.go +++ b/lib/destination/ddl/ddl_create_table_test.go @@ -4,21 +4,18 @@ import ( "fmt" "time" + "github.com/stretchr/testify/assert" + "github.com/artie-labs/transfer/clients/bigquery" "github.com/artie-labs/transfer/clients/snowflake" "github.com/artie-labs/transfer/lib/config" - - "github.com/artie-labs/transfer/lib/ptr" - - "github.com/artie-labs/transfer/lib/typing/columns" - "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/destination" "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" "github.com/artie-labs/transfer/lib/mocks" "github.com/artie-labs/transfer/lib/typing" - "github.com/stretchr/testify/assert" + "github.com/artie-labs/transfer/lib/typing/columns" ) func (d *DDLTestSuite) Test_CreateTable() { @@ -52,17 +49,16 @@ func (d *DDLTestSuite) Test_CreateTable() { _dwh: d.snowflakeStagesStore, _tableConfig: snowflakeStagesTc, _fakeStore: d.fakeSnowflakeStagesStore, - _expectedQuery: fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s (name string)", snowflakeTableID.FullyQualifiedName()), + _expectedQuery: fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s ("NAME" string)`, snowflakeTableID.FullyQualifiedName()), }, } { alterTableArgs := ddl.AlterTableArgs{ - Dwh: dwhTc._dwh, - Tc: dwhTc._tableConfig, - TableID: dwhTc._tableID, - CreateTable: dwhTc._tableConfig.CreateTable(), - ColumnOp: constants.Add, - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: dwhTc._dwh, + Tc: dwhTc._tableConfig, + TableID: dwhTc._tableID, + CreateTable: dwhTc._tableConfig.CreateTable(), + ColumnOp: constants.Add, + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(columns.NewColumn("name", typing.String))) @@ -102,17 +98,17 @@ func (d *DDLTestSuite) TestCreateTable() { { name: "happy path", cols: happyPathCols, - expectedQuery: `CREATE TABLE IF NOT EXISTS demo.public."EXPERIMENTS" (user_id string)`, + expectedQuery: `CREATE TABLE IF NOT EXISTS demo.public."EXPERIMENTS" ("USER_ID" string)`, }, { name: "happy path + enabled", cols: twoCols, - expectedQuery: `CREATE TABLE IF NOT EXISTS demo.public."EXPERIMENTS" (user_id string,enabled boolean)`, + expectedQuery: `CREATE TABLE IF NOT EXISTS demo.public."EXPERIMENTS" ("USER_ID" string,"ENABLED" boolean)`, }, { name: "complex table creation", cols: bunchOfCols, - expectedQuery: `CREATE TABLE IF NOT EXISTS demo.public."EXPERIMENTS" (user_id string,enabled_boolean boolean,array array,struct variant)`, + expectedQuery: `CREATE TABLE IF NOT EXISTS demo.public."EXPERIMENTS" ("USER_ID" string,"ENABLED_BOOLEAN" boolean,"ARRAY" array,"STRUCT" variant)`, }, } @@ -122,14 +118,13 @@ func (d *DDLTestSuite) TestCreateTable() { tc := d.snowflakeStagesStore.GetConfigMap().TableConfig(tableID) alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.snowflakeStagesStore, - Tc: tc, - TableID: tableID, - CreateTable: tc.CreateTable(), - ColumnOp: constants.Add, - CdcTime: time.Now().UTC(), - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.snowflakeStagesStore, + Tc: tc, + TableID: tableID, + CreateTable: tc.CreateTable(), + ColumnOp: constants.Add, + CdcTime: time.Now().UTC(), + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(testCase.cols...), testCase.name) diff --git a/lib/destination/ddl/ddl_sflk_test.go b/lib/destination/ddl/ddl_sflk_test.go index 2305929d2..deae9dfd8 100644 --- a/lib/destination/ddl/ddl_sflk_test.go +++ b/lib/destination/ddl/ddl_sflk_test.go @@ -5,19 +5,15 @@ import ( "fmt" "time" - "github.com/artie-labs/transfer/clients/snowflake" - "github.com/artie-labs/transfer/lib/config" - - "github.com/artie-labs/transfer/lib/ptr" - - "github.com/artie-labs/transfer/lib/typing/columns" - "github.com/stretchr/testify/assert" + "github.com/artie-labs/transfer/clients/snowflake" + "github.com/artie-labs/transfer/lib/config" "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" "github.com/artie-labs/transfer/lib/typing" + "github.com/artie-labs/transfer/lib/typing/columns" "github.com/artie-labs/transfer/lib/typing/ext" ) @@ -34,20 +30,19 @@ func (d *DDLTestSuite) TestAlterComplexObjects() { tc := d.snowflakeStagesStore.GetConfigMap().TableConfig(tableID) alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.snowflakeStagesStore, - Tc: tc, - TableID: tableID, - ColumnOp: constants.Add, - CdcTime: time.Now().UTC(), - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.snowflakeStagesStore, + Tc: tc, + TableID: tableID, + ColumnOp: constants.Add, + CdcTime: time.Now().UTC(), + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(cols...)) for i := 0; i < len(cols); i++ { execQuery, _ := d.fakeSnowflakeStagesStore.ExecArgsForCall(i) assert.Equal(d.T(), fmt.Sprintf("ALTER TABLE %s add COLUMN %s %s", `shop.public."COMPLEX_COLUMNS"`, - cols[i].Name(false, d.snowflakeStagesStore.Label()), + d.snowflakeStagesStore.Dialect().QuoteIdentifier(cols[i].Name()), typing.KindToDWHType(cols[i].KindDetails, d.snowflakeStagesStore.Label(), false)), execQuery) } @@ -68,13 +63,12 @@ func (d *DDLTestSuite) TestAlterIdempotency() { d.fakeSnowflakeStagesStore.ExecReturns(nil, errors.New("column 'order_name' already exists")) alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.snowflakeStagesStore, - Tc: tc, - TableID: tableID, - ColumnOp: constants.Add, - CdcTime: time.Now().UTC(), - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.snowflakeStagesStore, + Tc: tc, + TableID: tableID, + ColumnOp: constants.Add, + CdcTime: time.Now().UTC(), + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(cols...)) @@ -98,13 +92,12 @@ func (d *DDLTestSuite) TestAlterTableAdd() { tc := d.snowflakeStagesStore.GetConfigMap().TableConfig(tableID) alterTableArgs := ddl.AlterTableArgs{ - Dwh: d.snowflakeStagesStore, - Tc: tc, - TableID: tableID, - ColumnOp: constants.Add, - CdcTime: time.Now().UTC(), - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.snowflakeStagesStore, + Tc: tc, + TableID: tableID, + ColumnOp: constants.Add, + CdcTime: time.Now().UTC(), + Mode: config.Replication, } assert.NoError(d.T(), alterTableArgs.AlterTable(cols...)) @@ -115,15 +108,15 @@ func (d *DDLTestSuite) TestAlterTableAdd() { for _, column := range tableConfig.Columns().GetColumns() { var found bool for _, expCol := range cols { - if found = column.RawName() == expCol.RawName(); found { - assert.Equal(d.T(), column.KindDetails, expCol.KindDetails, fmt.Sprintf("wrong col kind, col: %s", column.RawName())) + if found = column.Name() == expCol.Name(); found { + assert.Equal(d.T(), column.KindDetails, expCol.KindDetails, fmt.Sprintf("wrong col kind, col: %s", column.Name())) break } } assert.True(d.T(), found, fmt.Sprintf("Col not found: %s, actual list: %v, expected list: %v", - column.RawName(), tableConfig.Columns(), cols)) + column.Name(), tableConfig.Columns(), cols)) } } @@ -146,7 +139,6 @@ func (d *DDLTestSuite) TestAlterTableDeleteDryRun() { ContainOtherOperations: true, ColumnOp: constants.Delete, CdcTime: time.Now().UTC(), - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -158,7 +150,7 @@ func (d *DDLTestSuite) TestAlterTableDeleteDryRun() { for col := range tableConfig.ReadOnlyColumnsToDelete() { var found bool for _, expCol := range cols { - if found = col == expCol.RawName(); found { + if found = col == expCol.Name(); found { break } } @@ -169,7 +161,7 @@ func (d *DDLTestSuite) TestAlterTableDeleteDryRun() { } for i := 0; i < len(cols); i++ { - colToActuallyDelete := cols[i].RawName() + colToActuallyDelete := cols[i].Name() // Now let's check the timestamp assert.True(d.T(), tableConfig.ReadOnlyColumnsToDelete()[colToActuallyDelete].After(time.Now())) // Now let's actually try to dial the time back, and it should actually try to delete. @@ -180,7 +172,8 @@ func (d *DDLTestSuite) TestAlterTableDeleteDryRun() { execArg, _ := d.fakeSnowflakeStagesStore.ExecArgsForCall(i) assert.Equal(d.T(), execArg, fmt.Sprintf("ALTER TABLE %s %s COLUMN %s", `shop.public."USERS"`, constants.Delete, - cols[i].Name(false, d.snowflakeStagesStore.Label()))) + d.snowflakeStagesStore.Dialect().QuoteIdentifier(cols[i].Name()), + )) } } @@ -211,7 +204,6 @@ func (d *DDLTestSuite) TestAlterTableDelete() { ColumnOp: constants.Delete, ContainOtherOperations: true, CdcTime: time.Now(), - UppercaseEscNames: ptr.ToBool(false), Mode: config.Replication, } @@ -223,7 +215,7 @@ func (d *DDLTestSuite) TestAlterTableDelete() { for col := range tableConfig.ReadOnlyColumnsToDelete() { var found bool for _, expCol := range cols { - if found = col == expCol.RawName(); found { + if found = col == expCol.Name(); found { break } } diff --git a/lib/destination/ddl/ddl_suite_test.go b/lib/destination/ddl/ddl_suite_test.go index 9741c649b..299ea10ad 100644 --- a/lib/destination/ddl/ddl_suite_test.go +++ b/lib/destination/ddl/ddl_suite_test.go @@ -29,10 +29,6 @@ type DDLTestSuite struct { } func (d *DDLTestSuite) SetupTest() { - cfg := config.Config{ - Redshift: &config.Redshift{}, - } - d.bigQueryCfg = config.Config{ BigQuery: &config.BigQuery{ ProjectID: "artie-project", @@ -48,12 +44,21 @@ func (d *DDLTestSuite) SetupTest() { d.fakeSnowflakeStagesStore = &mocks.FakeStore{} snowflakeStagesStore := db.Store(d.fakeSnowflakeStagesStore) - d.snowflakeStagesStore, err = snowflake.LoadSnowflake(cfg, &snowflakeStagesStore) + snowflakeCfg := config.Config{ + Snowflake: &config.Snowflake{}, + SharedDestinationConfig: config.SharedDestinationConfig{ + UppercaseEscapedNames: true, + }, + } + d.snowflakeStagesStore, err = snowflake.LoadSnowflake(snowflakeCfg, &snowflakeStagesStore) assert.NoError(d.T(), err) d.fakeRedshiftStore = &mocks.FakeStore{} redshiftStore := db.Store(d.fakeRedshiftStore) - d.redshiftStore, err = redshift.LoadRedshift(cfg, &redshiftStore) + redshiftCfg := config.Config{ + Redshift: &config.Redshift{}, + } + d.redshiftStore, err = redshift.LoadRedshift(redshiftCfg, &redshiftStore) assert.NoError(d.T(), err) } diff --git a/lib/destination/ddl/ddl_temp_test.go b/lib/destination/ddl/ddl_temp_test.go index efa3a6940..57d901dce 100644 --- a/lib/destination/ddl/ddl_temp_test.go +++ b/lib/destination/ddl/ddl_temp_test.go @@ -3,27 +3,23 @@ package ddl_test import ( "time" + "github.com/stretchr/testify/assert" + "github.com/artie-labs/transfer/clients/bigquery" "github.com/artie-labs/transfer/clients/snowflake" "github.com/artie-labs/transfer/lib/config" - - "github.com/artie-labs/transfer/lib/ptr" - - "github.com/artie-labs/transfer/lib/typing/columns" - "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/destination/ddl" "github.com/artie-labs/transfer/lib/destination/types" "github.com/artie-labs/transfer/lib/typing" - "github.com/stretchr/testify/assert" + "github.com/artie-labs/transfer/lib/typing/columns" ) func (d *DDLTestSuite) TestValidate_AlterTableArgs() { a := &ddl.AlterTableArgs{ - ColumnOp: constants.Delete, - CreateTable: true, - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + ColumnOp: constants.Delete, + CreateTable: true, + Mode: config.Replication, } assert.Contains(d.T(), a.Validate().Error(), "incompatible operation - cannot drop columns and create table at the same time") @@ -39,15 +35,14 @@ func (d *DDLTestSuite) TestCreateTemporaryTable_Errors() { d.snowflakeStagesStore.GetConfigMap().AddTableToConfig(tableID, types.NewDwhTableConfig(&columns.Columns{}, nil, true, true)) snowflakeTc := d.snowflakeStagesStore.GetConfigMap().TableConfig(tableID) args := ddl.AlterTableArgs{ - Dwh: d.snowflakeStagesStore, - Tc: snowflakeTc, - TableID: tableID, - CreateTable: true, - TemporaryTable: true, - ColumnOp: constants.Add, - CdcTime: time.Time{}, - UppercaseEscNames: ptr.ToBool(true), - Mode: config.Replication, + Dwh: d.snowflakeStagesStore, + Tc: snowflakeTc, + TableID: tableID, + CreateTable: true, + TemporaryTable: true, + ColumnOp: constants.Add, + CdcTime: time.Time{}, + Mode: config.Replication, } // No columns. @@ -74,15 +69,14 @@ func (d *DDLTestSuite) TestCreateTemporaryTable() { d.snowflakeStagesStore.GetConfigMap().AddTableToConfig(tableID, types.NewDwhTableConfig(&columns.Columns{}, nil, true, true)) sflkStageTc := d.snowflakeStagesStore.GetConfigMap().TableConfig(tableID) args := ddl.AlterTableArgs{ - Dwh: d.snowflakeStagesStore, - Tc: sflkStageTc, - TableID: tableID, - CreateTable: true, - TemporaryTable: true, - ColumnOp: constants.Add, - CdcTime: time.Time{}, - UppercaseEscNames: ptr.ToBool(true), - Mode: config.Replication, + Dwh: d.snowflakeStagesStore, + Tc: sflkStageTc, + TableID: tableID, + CreateTable: true, + TemporaryTable: true, + ColumnOp: constants.Add, + CdcTime: time.Time{}, + Mode: config.Replication, } assert.NoError(d.T(), args.AlterTable(columns.NewColumn("foo", typing.String), columns.NewColumn("bar", typing.Float), columns.NewColumn("start", typing.String))) @@ -100,15 +94,14 @@ func (d *DDLTestSuite) TestCreateTemporaryTable() { d.bigQueryStore.GetConfigMap().AddTableToConfig(tableID, types.NewDwhTableConfig(&columns.Columns{}, nil, true, true)) bqTc := d.bigQueryStore.GetConfigMap().TableConfig(tableID) args := ddl.AlterTableArgs{ - Dwh: d.bigQueryStore, - Tc: bqTc, - TableID: tableID, - CreateTable: true, - TemporaryTable: true, - ColumnOp: constants.Add, - CdcTime: time.Time{}, - UppercaseEscNames: ptr.ToBool(false), - Mode: config.Replication, + Dwh: d.bigQueryStore, + Tc: bqTc, + TableID: tableID, + CreateTable: true, + TemporaryTable: true, + ColumnOp: constants.Add, + CdcTime: time.Time{}, + Mode: config.Replication, } assert.NoError(d.T(), args.AlterTable(columns.NewColumn("foo", typing.String), columns.NewColumn("bar", typing.Float), columns.NewColumn("select", typing.String))) diff --git a/lib/destination/dml/merge.go b/lib/destination/dml/merge.go index 715f7ab5a..2c5b67eb2 100644 --- a/lib/destination/dml/merge.go +++ b/lib/destination/dml/merge.go @@ -3,6 +3,7 @@ package dml import ( "errors" "fmt" + "slices" "strings" "github.com/artie-labs/transfer/lib/array" @@ -17,7 +18,7 @@ type MergeArgument struct { TableID types.TableIdentifier SubQuery string IdempotentKey string - PrimaryKeys []columns.Wrapper + PrimaryKeys []columns.Column // AdditionalEqualityStrings is used for handling BigQuery partitioned table merges AdditionalEqualityStrings []string @@ -30,7 +31,6 @@ type MergeArgument struct { // ContainsHardDeletes is only used for Redshift and MergeStatementParts, // where we do not issue a DELETE statement if there are no hard deletes in the batch ContainsHardDeletes *bool - UppercaseEscNames *bool Dialect sql.Dialect } @@ -55,10 +55,6 @@ func (m *MergeArgument) Valid() error { return fmt.Errorf("subQuery cannot be empty") } - if m.UppercaseEscNames == nil { - return fmt.Errorf("uppercaseEscNames cannot be nil") - } - if !constants.IsValidDestination(m.DestKind) { return fmt.Errorf("invalid destination: %s", m.DestKind) } @@ -70,6 +66,12 @@ func (m *MergeArgument) Valid() error { return nil } +func removeDeleteColumnMarker(columns []string) ([]string, bool) { + origLength := len(columns) + columns = slices.DeleteFunc(columns, func(col string) bool { return col == constants.DeleteColumnMarker }) + return columns, len(columns) != origLength +} + func (m *MergeArgument) GetParts() ([]string, error) { if err := m.Valid(); err != nil { return nil, err @@ -99,32 +101,34 @@ func (m *MergeArgument) GetParts() ([]string, error) { var equalitySQLParts []string for _, primaryKey := range m.PrimaryKeys { // We'll need to escape the primary key as well. - equalitySQL := fmt.Sprintf("c.%s = cc.%s", primaryKey.EscapedName(), primaryKey.EscapedName()) + quotedPrimaryKey := m.Dialect.QuoteIdentifier(primaryKey.Name()) + equalitySQL := fmt.Sprintf("c.%s = cc.%s", quotedPrimaryKey, quotedPrimaryKey) equalitySQLParts = append(equalitySQLParts, equalitySQL) } - cols := m.Columns.GetEscapedColumnsToUpdate(*m.UppercaseEscNames, m.DestKind) + columns := m.Columns.GetColumnsToUpdate() if m.SoftDelete { return []string{ // INSERT fmt.Sprintf(`INSERT INTO %s (%s) SELECT %s FROM %s as cc LEFT JOIN %s as c on %s WHERE c.%s IS NULL;`, // insert into target (col1, col2, col3) - m.TableID.FullyQualifiedName(), strings.Join(cols, ","), + m.TableID.FullyQualifiedName(), strings.Join(sql.QuoteIdentifiers(columns, m.Dialect), ","), // SELECT cc.col1, cc.col2, ... FROM staging as CC array.StringsJoinAddPrefix(array.StringsJoinAddPrefixArgs{ - Vals: cols, + Vals: sql.QuoteIdentifiers(columns, m.Dialect), Separator: ",", Prefix: "cc.", }), m.SubQuery, // LEFT JOIN table on pk(s) m.TableID.FullyQualifiedName(), strings.Join(equalitySQLParts, " and "), // Where PK is NULL (we only need to specify one primary key since it's covered with equalitySQL parts) - m.PrimaryKeys[0].EscapedName()), + m.Dialect.QuoteIdentifier(m.PrimaryKeys[0].Name()), + ), // UPDATE fmt.Sprintf(`UPDATE %s as c SET %s FROM %s as cc WHERE %s%s;`, // UPDATE table set col1 = cc. col1 - m.TableID.FullyQualifiedName(), m.Columns.UpdateQuery(m.DestKind, *m.UppercaseEscNames, false), + m.TableID.FullyQualifiedName(), m.Columns.UpdateQuery(m.Dialect, false), // FROM table (temp) WHERE join on PK(s) m.SubQuery, strings.Join(equalitySQLParts, " and "), idempotentClause, ), @@ -133,42 +137,36 @@ func (m *MergeArgument) GetParts() ([]string, error) { // We also need to remove __artie flags since it does not exist in the destination table var removed bool - for idx, col := range cols { - if col == sql.EscapeNameIfNecessaryUsingDialect(constants.DeleteColumnMarker, m.Dialect) { - cols = append(cols[:idx], cols[idx+1:]...) - removed = true - break - } - } - + columns, removed = removeDeleteColumnMarker(columns) if !removed { return nil, errors.New("artie delete flag doesn't exist") } var pks []string for _, pk := range m.PrimaryKeys { - pks = append(pks, pk.EscapedName()) + pks = append(pks, m.Dialect.QuoteIdentifier(pk.Name())) } parts := []string{ // INSERT fmt.Sprintf(`INSERT INTO %s (%s) SELECT %s FROM %s as cc LEFT JOIN %s as c on %s WHERE c.%s IS NULL;`, // insert into target (col1, col2, col3) - m.TableID.FullyQualifiedName(), strings.Join(cols, ","), + m.TableID.FullyQualifiedName(), strings.Join(sql.QuoteIdentifiers(columns, m.Dialect), ","), // SELECT cc.col1, cc.col2, ... FROM staging as CC array.StringsJoinAddPrefix(array.StringsJoinAddPrefixArgs{ - Vals: cols, + Vals: sql.QuoteIdentifiers(columns, m.Dialect), Separator: ",", Prefix: "cc.", }), m.SubQuery, // LEFT JOIN table on pk(s) m.TableID.FullyQualifiedName(), strings.Join(equalitySQLParts, " and "), // Where PK is NULL (we only need to specify one primary key since it's covered with equalitySQL parts) - m.PrimaryKeys[0].EscapedName()), + m.Dialect.QuoteIdentifier(m.PrimaryKeys[0].Name()), + ), // UPDATE fmt.Sprintf(`UPDATE %s as c SET %s FROM %s as cc WHERE %s%s AND COALESCE(cc.%s, false) = false;`, // UPDATE table set col1 = cc. col1 - m.TableID.FullyQualifiedName(), m.Columns.UpdateQuery(m.DestKind, *m.UppercaseEscNames, true), + m.TableID.FullyQualifiedName(), m.Columns.UpdateQuery(m.Dialect, true), // FROM staging WHERE join on PK(s) m.SubQuery, strings.Join(equalitySQLParts, " and "), idempotentClause, constants.DeleteColumnMarker, ), @@ -212,15 +210,17 @@ func (m *MergeArgument) GetStatement() (string, error) { var equalitySQLParts []string for _, primaryKey := range m.PrimaryKeys { // We'll need to escape the primary key as well. - equalitySQL := fmt.Sprintf("c.%s = cc.%s", primaryKey.EscapedName(), primaryKey.EscapedName()) - pkCol, isOk := m.Columns.GetColumn(primaryKey.RawName()) + quotedPrimaryKey := m.Dialect.QuoteIdentifier(primaryKey.Name()) + + equalitySQL := fmt.Sprintf("c.%s = cc.%s", quotedPrimaryKey, quotedPrimaryKey) + pkCol, isOk := m.Columns.GetColumn(primaryKey.Name()) if !isOk { - return "", fmt.Errorf("column: %s does not exist in columnToType: %v", primaryKey.RawName(), m.Columns) + return "", fmt.Errorf("column: %s does not exist in columnToType: %v", primaryKey.Name(), m.Columns) } if m.DestKind == constants.BigQuery && pkCol.KindDetails.Kind == typing.Struct.Kind { // BigQuery requires special casting to compare two JSON objects. - equalitySQL = fmt.Sprintf("TO_JSON_STRING(c.%s) = TO_JSON_STRING(cc.%s)", primaryKey.EscapedName(), primaryKey.EscapedName()) + equalitySQL = fmt.Sprintf("TO_JSON_STRING(c.%s) = TO_JSON_STRING(cc.%s)", quotedPrimaryKey, quotedPrimaryKey) } equalitySQLParts = append(equalitySQLParts, equalitySQL) @@ -235,7 +235,7 @@ func (m *MergeArgument) GetStatement() (string, error) { equalitySQLParts = append(equalitySQLParts, m.AdditionalEqualityStrings...) } - cols := m.Columns.GetEscapedColumnsToUpdate(*m.UppercaseEscNames, m.DestKind) + columns := m.Columns.GetColumnsToUpdate() if m.SoftDelete { return fmt.Sprintf(` @@ -244,11 +244,11 @@ WHEN MATCHED %sTHEN UPDATE SET %s WHEN NOT MATCHED AND IFNULL(cc.%s, false) = false THEN INSERT (%s) VALUES (%s);`, m.TableID.FullyQualifiedName(), subQuery, strings.Join(equalitySQLParts, " and "), // Update + Soft Deletion - idempotentClause, m.Columns.UpdateQuery(m.DestKind, *m.UppercaseEscNames, false), + idempotentClause, m.Columns.UpdateQuery(m.Dialect, false), // Insert - constants.DeleteColumnMarker, strings.Join(cols, ","), + constants.DeleteColumnMarker, strings.Join(sql.QuoteIdentifiers(columns, m.Dialect), ","), array.StringsJoinAddPrefix(array.StringsJoinAddPrefixArgs{ - Vals: cols, + Vals: sql.QuoteIdentifiers(columns, m.Dialect), Separator: ",", Prefix: "cc.", })), nil @@ -256,14 +256,7 @@ WHEN NOT MATCHED AND IFNULL(cc.%s, false) = false THEN INSERT (%s) VALUES (%s);` // We also need to remove __artie flags since it does not exist in the destination table var removed bool - for idx, col := range cols { - if col == sql.EscapeNameIfNecessaryUsingDialect(constants.DeleteColumnMarker, m.Dialect) { - cols = append(cols[:idx], cols[idx+1:]...) - removed = true - break - } - } - + columns, removed = removeDeleteColumnMarker(columns) if !removed { return "", errors.New("artie delete flag doesn't exist") } @@ -277,11 +270,11 @@ WHEN NOT MATCHED AND IFNULL(cc.%s, false) = false THEN INSERT (%s) VALUES (%s);` // Delete constants.DeleteColumnMarker, // Update - constants.DeleteColumnMarker, idempotentClause, m.Columns.UpdateQuery(m.DestKind, *m.UppercaseEscNames, true), + constants.DeleteColumnMarker, idempotentClause, m.Columns.UpdateQuery(m.Dialect, true), // Insert - constants.DeleteColumnMarker, strings.Join(cols, ","), + constants.DeleteColumnMarker, strings.Join(sql.QuoteIdentifiers(columns, m.Dialect), ","), array.StringsJoinAddPrefix(array.StringsJoinAddPrefixArgs{ - Vals: cols, + Vals: sql.QuoteIdentifiers(columns, m.Dialect), Separator: ",", Prefix: "cc.", })), nil @@ -300,11 +293,12 @@ func (m *MergeArgument) GetMSSQLStatement() (string, error) { var equalitySQLParts []string for _, primaryKey := range m.PrimaryKeys { // We'll need to escape the primary key as well. - equalitySQL := fmt.Sprintf("c.%s = cc.%s", primaryKey.EscapedName(), primaryKey.EscapedName()) + quotedPrimaryKey := m.Dialect.QuoteIdentifier(primaryKey.Name()) + equalitySQL := fmt.Sprintf("c.%s = cc.%s", quotedPrimaryKey, quotedPrimaryKey) equalitySQLParts = append(equalitySQLParts, equalitySQL) } - cols := m.Columns.GetEscapedColumnsToUpdate(*m.UppercaseEscNames, m.DestKind) + columns := m.Columns.GetColumnsToUpdate() if m.SoftDelete { return fmt.Sprintf(` @@ -314,11 +308,11 @@ WHEN MATCHED %sTHEN UPDATE SET %s WHEN NOT MATCHED AND COALESCE(cc.%s, 0) = 0 THEN INSERT (%s) VALUES (%s);`, m.TableID.FullyQualifiedName(), m.SubQuery, strings.Join(equalitySQLParts, " and "), // Update + Soft Deletion - idempotentClause, m.Columns.UpdateQuery(m.DestKind, *m.UppercaseEscNames, false), + idempotentClause, m.Columns.UpdateQuery(m.Dialect, false), // Insert - constants.DeleteColumnMarker, strings.Join(cols, ","), + constants.DeleteColumnMarker, strings.Join(sql.QuoteIdentifiers(columns, m.Dialect), ","), array.StringsJoinAddPrefix(array.StringsJoinAddPrefixArgs{ - Vals: cols, + Vals: sql.QuoteIdentifiers(columns, m.Dialect), Separator: ",", Prefix: "cc.", })), nil @@ -326,14 +320,7 @@ WHEN NOT MATCHED AND COALESCE(cc.%s, 0) = 0 THEN INSERT (%s) VALUES (%s);`, // We also need to remove __artie flags since it does not exist in the destination table var removed bool - for idx, col := range cols { - if col == sql.EscapeNameIfNecessaryUsingDialect(constants.DeleteColumnMarker, m.Dialect) { - cols = append(cols[:idx], cols[idx+1:]...) - removed = true - break - } - } - + columns, removed = removeDeleteColumnMarker(columns) if !removed { return "", errors.New("artie delete flag doesn't exist") } @@ -348,11 +335,11 @@ WHEN NOT MATCHED AND COALESCE(cc.%s, 1) = 0 THEN INSERT (%s) VALUES (%s);`, // Delete constants.DeleteColumnMarker, // Update - constants.DeleteColumnMarker, idempotentClause, m.Columns.UpdateQuery(m.DestKind, *m.UppercaseEscNames, true), + constants.DeleteColumnMarker, idempotentClause, m.Columns.UpdateQuery(m.Dialect, true), // Insert - constants.DeleteColumnMarker, strings.Join(cols, ","), + constants.DeleteColumnMarker, strings.Join(sql.QuoteIdentifiers(columns, m.Dialect), ","), array.StringsJoinAddPrefix(array.StringsJoinAddPrefixArgs{ - Vals: cols, + Vals: sql.QuoteIdentifiers(columns, m.Dialect), Separator: ",", Prefix: "cc.", })), nil diff --git a/lib/destination/dml/merge_bigquery_test.go b/lib/destination/dml/merge_bigquery_test.go index 38524abb9..355c8229e 100644 --- a/lib/destination/dml/merge_bigquery_test.go +++ b/lib/destination/dml/merge_bigquery_test.go @@ -4,7 +4,6 @@ import ( "testing" "github.com/artie-labs/transfer/lib/config/constants" - "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" @@ -18,14 +17,13 @@ func TestMergeStatement_TempTable(t *testing.T) { cols.AddColumn(columns.NewColumn(constants.DeleteColumnMarker, typing.Boolean)) mergeArg := &MergeArgument{ - TableID: MockTableIdentifier{"customers.orders"}, - SubQuery: "customers.orders_tmp", - PrimaryKeys: []columns.Wrapper{columns.NewWrapper(columns.NewColumn("order_id", typing.Invalid), false, constants.BigQuery)}, - Columns: &cols, - DestKind: constants.BigQuery, - Dialect: sql.BigQueryDialect{}, - SoftDelete: false, - UppercaseEscNames: ptr.ToBool(false), + TableID: MockTableIdentifier{"customers.orders"}, + SubQuery: "customers.orders_tmp", + PrimaryKeys: []columns.Column{columns.NewColumn("order_id", typing.Invalid)}, + Columns: &cols, + DestKind: constants.BigQuery, + Dialect: sql.BigQueryDialect{}, + SoftDelete: false, } mergeSQL, err := mergeArg.GetStatement() @@ -41,14 +39,13 @@ func TestMergeStatement_JSONKey(t *testing.T) { cols.AddColumn(columns.NewColumn(constants.DeleteColumnMarker, typing.Boolean)) mergeArg := &MergeArgument{ - TableID: MockTableIdentifier{"customers.orders"}, - SubQuery: "customers.orders_tmp", - PrimaryKeys: []columns.Wrapper{columns.NewWrapper(columns.NewColumn("order_oid", typing.Invalid), false, constants.BigQuery)}, - Columns: &cols, - DestKind: constants.BigQuery, - Dialect: sql.BigQueryDialect{}, - SoftDelete: false, - UppercaseEscNames: ptr.ToBool(false), + TableID: MockTableIdentifier{"customers.orders"}, + SubQuery: "customers.orders_tmp", + PrimaryKeys: []columns.Column{columns.NewColumn("order_oid", typing.Invalid)}, + Columns: &cols, + DestKind: constants.BigQuery, + Dialect: sql.BigQueryDialect{}, + SoftDelete: false, } mergeSQL, err := mergeArg.GetStatement() diff --git a/lib/destination/dml/merge_mssql_test.go b/lib/destination/dml/merge_mssql_test.go index b64455140..15613eb7f 100644 --- a/lib/destination/dml/merge_mssql_test.go +++ b/lib/destination/dml/merge_mssql_test.go @@ -7,7 +7,6 @@ import ( "time" "github.com/artie-labs/transfer/lib/config/constants" - "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" @@ -42,15 +41,14 @@ func Test_GetMSSQLStatement(t *testing.T) { strings.Join(cols, ","), strings.Join(tableValues, ","), "_tbl", strings.Join(cols, ",")) mergeArg := MergeArgument{ - TableID: MockTableIdentifier{fqTable}, - SubQuery: subQuery, - IdempotentKey: "", - PrimaryKeys: []columns.Wrapper{columns.NewWrapper(columns.NewColumn("id", typing.Invalid), false, constants.MSSQL)}, - Columns: &_cols, - DestKind: constants.MSSQL, - Dialect: sql.DefaultDialect{}, - SoftDelete: false, - UppercaseEscNames: ptr.ToBool(false), + TableID: MockTableIdentifier{fqTable}, + SubQuery: subQuery, + IdempotentKey: "", + PrimaryKeys: []columns.Column{columns.NewColumn("id", typing.Invalid)}, + Columns: &_cols, + DestKind: constants.MSSQL, + Dialect: sql.MSSQLDialect{}, + SoftDelete: false, } mergeSQL, err := mergeArg.GetMSSQLStatement() diff --git a/lib/destination/dml/merge_parts_test.go b/lib/destination/dml/merge_parts_test.go index d6be3ce5c..7c7f3ce28 100644 --- a/lib/destination/dml/merge_parts_test.go +++ b/lib/destination/dml/merge_parts_test.go @@ -25,7 +25,7 @@ func TestMergeStatementPartsValidation(t *testing.T) { } type result struct { - PrimaryKeys []columns.Wrapper + PrimaryKeys []columns.Column ColumnsToTypes columns.Columns } @@ -47,11 +47,11 @@ func getBasicColumnsForTest(compositeKey bool) result { cols.AddColumn(textToastCol) cols.AddColumn(columns.NewColumn(constants.DeleteColumnMarker, typing.Boolean)) - var pks []columns.Wrapper - pks = append(pks, columns.NewWrapper(idCol, false, constants.Redshift)) + var pks []columns.Column + pks = append(pks, idCol) if compositeKey { - pks = append(pks, columns.NewWrapper(emailCol, false, constants.Redshift)) + pks = append(pks, emailCol) } return result{ @@ -75,7 +75,6 @@ func TestMergeStatementParts_SkipDelete(t *testing.T) { DestKind: constants.Redshift, Dialect: sql.RedshiftDialect{}, ContainsHardDeletes: ptr.ToBool(false), - UppercaseEscNames: ptr.ToBool(false), } parts, err := mergeArg.GetParts() @@ -103,7 +102,6 @@ func TestMergeStatementPartsSoftDelete(t *testing.T) { DestKind: constants.Redshift, Dialect: sql.RedshiftDialect{}, SoftDelete: true, - UppercaseEscNames: ptr.ToBool(false), ContainsHardDeletes: ptr.ToBool(false), } @@ -144,7 +142,6 @@ func TestMergeStatementPartsSoftDeleteComposite(t *testing.T) { DestKind: constants.Redshift, Dialect: sql.RedshiftDialect{}, SoftDelete: true, - UppercaseEscNames: ptr.ToBool(false), ContainsHardDeletes: ptr.ToBool(false), } @@ -188,7 +185,6 @@ func TestMergeStatementParts(t *testing.T) { DestKind: constants.Redshift, Dialect: sql.RedshiftDialect{}, ContainsHardDeletes: ptr.ToBool(true), - UppercaseEscNames: ptr.ToBool(false), } parts, err := mergeArg.GetParts() @@ -216,7 +212,6 @@ func TestMergeStatementParts(t *testing.T) { Dialect: sql.RedshiftDialect{}, IdempotentKey: "created_at", ContainsHardDeletes: ptr.ToBool(true), - UppercaseEscNames: ptr.ToBool(false), } parts, err = mergeArg.GetParts() @@ -248,7 +243,6 @@ func TestMergeStatementPartsCompositeKey(t *testing.T) { DestKind: constants.Redshift, Dialect: sql.RedshiftDialect{}, ContainsHardDeletes: ptr.ToBool(true), - UppercaseEscNames: ptr.ToBool(false), } parts, err := mergeArg.GetParts() @@ -276,7 +270,6 @@ func TestMergeStatementPartsCompositeKey(t *testing.T) { Dialect: sql.RedshiftDialect{}, ContainsHardDeletes: ptr.ToBool(true), IdempotentKey: "created_at", - UppercaseEscNames: ptr.ToBool(false), } parts, err = mergeArg.GetParts() diff --git a/lib/destination/dml/merge_test.go b/lib/destination/dml/merge_test.go index de3b30b2f..0f0081bf3 100644 --- a/lib/destination/dml/merge_test.go +++ b/lib/destination/dml/merge_test.go @@ -10,7 +10,6 @@ import ( "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/destination/types" - "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" @@ -33,6 +32,39 @@ func (m MockTableIdentifier) FullyQualifiedName() string { return m.fqName } +func TestRemoveDeleteColumnMarker(t *testing.T) { + { + columns, removed := removeDeleteColumnMarker([]string{}) + assert.Empty(t, columns) + assert.False(t, removed) + } + { + columns, removed := removeDeleteColumnMarker([]string{"a"}) + assert.Equal(t, []string{"a"}, columns) + assert.False(t, removed) + } + { + columns, removed := removeDeleteColumnMarker([]string{"a", "b"}) + assert.Equal(t, []string{"a", "b"}, columns) + assert.False(t, removed) + } + { + columns, removed := removeDeleteColumnMarker([]string{constants.DeleteColumnMarker}) + assert.True(t, removed) + assert.Empty(t, columns) + } + { + columns, removed := removeDeleteColumnMarker([]string{"a", constants.DeleteColumnMarker, "b"}) + assert.True(t, removed) + assert.Equal(t, []string{"a", "b"}, columns) + } + { + columns, removed := removeDeleteColumnMarker([]string{"a", constants.DeleteColumnMarker, "b", constants.DeleteColumnMarker, "c"}) + assert.True(t, removed) + assert.Equal(t, []string{"a", "b", "c"}, columns) + } +} + func TestMergeStatementSoftDelete(t *testing.T) { // No idempotent key fqTable := "database.schema.table" @@ -59,15 +91,14 @@ func TestMergeStatementSoftDelete(t *testing.T) { for _, idempotentKey := range []string{"", "updated_at"} { mergeArg := MergeArgument{ - TableID: MockTableIdentifier{fqTable}, - SubQuery: subQuery, - IdempotentKey: idempotentKey, - PrimaryKeys: []columns.Wrapper{columns.NewWrapper(columns.NewColumn("id", typing.Invalid), false, constants.Snowflake)}, - Columns: &_cols, - DestKind: constants.Snowflake, - Dialect: sql.SnowflakeDialect{UppercaseEscNames: true}, - SoftDelete: true, - UppercaseEscNames: ptr.ToBool(true), + TableID: MockTableIdentifier{fqTable}, + SubQuery: subQuery, + IdempotentKey: idempotentKey, + PrimaryKeys: []columns.Column{columns.NewColumn("id", typing.Invalid)}, + Columns: &_cols, + DestKind: constants.Snowflake, + Dialect: sql.SnowflakeDialect{}, + SoftDelete: true, } mergeSQL, err := mergeArg.GetStatement() @@ -109,15 +140,14 @@ func TestMergeStatement(t *testing.T) { strings.Join(cols, ","), strings.Join(tableValues, ","), "_tbl", strings.Join(cols, ",")) mergeArg := MergeArgument{ - TableID: MockTableIdentifier{fqTable}, - SubQuery: subQuery, - IdempotentKey: "", - PrimaryKeys: []columns.Wrapper{columns.NewWrapper(columns.NewColumn("id", typing.Invalid), true, constants.Snowflake)}, - Columns: &_cols, - DestKind: constants.Snowflake, - Dialect: sql.SnowflakeDialect{UppercaseEscNames: true}, - SoftDelete: false, - UppercaseEscNames: ptr.ToBool(true), + TableID: MockTableIdentifier{fqTable}, + SubQuery: subQuery, + IdempotentKey: "", + PrimaryKeys: []columns.Column{columns.NewColumn("id", typing.Invalid)}, + Columns: &_cols, + DestKind: constants.Snowflake, + Dialect: sql.SnowflakeDialect{}, + SoftDelete: false, } mergeSQL, err := mergeArg.GetStatement() @@ -158,15 +188,14 @@ func TestMergeStatementIdempotentKey(t *testing.T) { _cols.AddColumn(columns.NewColumn(constants.DeleteColumnMarker, typing.Boolean)) mergeArg := MergeArgument{ - TableID: MockTableIdentifier{fqTable}, - SubQuery: subQuery, - IdempotentKey: "updated_at", - PrimaryKeys: []columns.Wrapper{columns.NewWrapper(columns.NewColumn("id", typing.Invalid), false, constants.Snowflake)}, - Columns: &_cols, - DestKind: constants.Snowflake, - Dialect: sql.SnowflakeDialect{UppercaseEscNames: true}, - SoftDelete: false, - UppercaseEscNames: ptr.ToBool(true), + TableID: MockTableIdentifier{fqTable}, + SubQuery: subQuery, + IdempotentKey: "updated_at", + PrimaryKeys: []columns.Column{columns.NewColumn("id", typing.Invalid)}, + Columns: &_cols, + DestKind: constants.Snowflake, + Dialect: sql.SnowflakeDialect{}, + SoftDelete: false, } mergeSQL, err := mergeArg.GetStatement() @@ -204,22 +233,21 @@ func TestMergeStatementCompositeKey(t *testing.T) { TableID: MockTableIdentifier{fqTable}, SubQuery: subQuery, IdempotentKey: "updated_at", - PrimaryKeys: []columns.Wrapper{ - columns.NewWrapper(columns.NewColumn("id", typing.Invalid), false, constants.Snowflake), - columns.NewWrapper(columns.NewColumn("another_id", typing.Invalid), false, constants.Snowflake), + PrimaryKeys: []columns.Column{ + columns.NewColumn("id", typing.Invalid), + columns.NewColumn("another_id", typing.Invalid), }, - Columns: &_cols, - DestKind: constants.Snowflake, - Dialect: sql.SnowflakeDialect{UppercaseEscNames: true}, - SoftDelete: false, - UppercaseEscNames: ptr.ToBool(true), + Columns: &_cols, + DestKind: constants.Snowflake, + Dialect: sql.SnowflakeDialect{}, + SoftDelete: false, } mergeSQL, err := mergeArg.GetStatement() assert.NoError(t, err) assert.Contains(t, mergeSQL, fmt.Sprintf("MERGE INTO %s", fqTable), mergeSQL) assert.Contains(t, mergeSQL, fmt.Sprintf("cc.%s >= c.%s", "updated_at", "updated_at"), fmt.Sprintf("Idempotency key: %s", mergeSQL)) - assert.Contains(t, mergeSQL, "cc ON c.id = cc.id and c.another_id = cc.another_id", mergeSQL) + assert.Contains(t, mergeSQL, `cc ON c."ID" = cc."ID" and c."ANOTHER_ID" = cc."ANOTHER_ID"`, mergeSQL) } func TestMergeStatementEscapePrimaryKeys(t *testing.T) { @@ -254,15 +282,14 @@ func TestMergeStatementEscapePrimaryKeys(t *testing.T) { TableID: MockTableIdentifier{fqTable}, SubQuery: subQuery, IdempotentKey: "", - PrimaryKeys: []columns.Wrapper{ - columns.NewWrapper(columns.NewColumn("id", typing.Invalid), true, constants.Snowflake), - columns.NewWrapper(columns.NewColumn("group", typing.Invalid), true, constants.Snowflake), + PrimaryKeys: []columns.Column{ + columns.NewColumn("id", typing.Invalid), + columns.NewColumn("group", typing.Invalid), }, - Columns: &_cols, - DestKind: constants.Snowflake, - Dialect: sql.SnowflakeDialect{UppercaseEscNames: true}, - SoftDelete: false, - UppercaseEscNames: ptr.ToBool(true), + Columns: &_cols, + DestKind: constants.Snowflake, + Dialect: sql.SnowflakeDialect{}, + SoftDelete: false, } mergeSQL, err := mergeArg.GetStatement() diff --git a/lib/destination/dml/merge_valid_test.go b/lib/destination/dml/merge_valid_test.go index ae2a335e3..fc7114762 100644 --- a/lib/destination/dml/merge_valid_test.go +++ b/lib/destination/dml/merge_valid_test.go @@ -6,7 +6,6 @@ import ( "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/sql" - "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" @@ -14,8 +13,8 @@ import ( ) func TestMergeArgument_Valid(t *testing.T) { - primaryKeys := []columns.Wrapper{ - columns.NewWrapper(columns.NewColumn("id", typing.Integer), false, constants.Snowflake), + primaryKeys := []columns.Column{ + columns.NewColumn("id", typing.Integer), } var cols columns.Columns @@ -71,48 +70,35 @@ func TestMergeArgument_Valid(t *testing.T) { expectedErr: "subQuery cannot be empty", }, { - name: "did not pass in uppercase esc col", + name: "missing dest kind", mergeArg: &MergeArgument{ PrimaryKeys: primaryKeys, Columns: &cols, - TableID: MockTableIdentifier{"schema.tableName"}, SubQuery: "schema.tableName", - }, - expectedErr: "uppercaseEscNames cannot be nil", - }, - { - name: "missing dest kind", - mergeArg: &MergeArgument{ - PrimaryKeys: primaryKeys, - Columns: &cols, - SubQuery: "schema.tableName", - TableID: MockTableIdentifier{"schema.tableName"}, - UppercaseEscNames: ptr.ToBool(false), + TableID: MockTableIdentifier{"schema.tableName"}, }, expectedErr: "invalid destination", }, { name: "missing dialect kind", mergeArg: &MergeArgument{ - PrimaryKeys: primaryKeys, - Columns: &cols, - SubQuery: "schema.tableName", - TableID: MockTableIdentifier{"schema.tableName"}, - UppercaseEscNames: ptr.ToBool(false), - DestKind: constants.BigQuery, + PrimaryKeys: primaryKeys, + Columns: &cols, + SubQuery: "schema.tableName", + TableID: MockTableIdentifier{"schema.tableName"}, + DestKind: constants.BigQuery, }, expectedErr: "dialect cannot be nil", }, { name: "everything exists", mergeArg: &MergeArgument{ - PrimaryKeys: primaryKeys, - Columns: &cols, - SubQuery: "schema.tableName", - TableID: MockTableIdentifier{"schema.tableName"}, - UppercaseEscNames: ptr.ToBool(false), - DestKind: constants.BigQuery, - Dialect: sql.BigQueryDialect{}, + PrimaryKeys: primaryKeys, + Columns: &cols, + SubQuery: "schema.tableName", + TableID: MockTableIdentifier{"schema.tableName"}, + DestKind: constants.BigQuery, + Dialect: sql.BigQueryDialect{}, }, }, } diff --git a/lib/destination/dwh.go b/lib/destination/dwh.go index 00a86b959..5685f9f03 100644 --- a/lib/destination/dwh.go +++ b/lib/destination/dwh.go @@ -21,7 +21,6 @@ type DataWarehouse interface { Begin() (*sql.Tx, error) // Helper functions for merge - ShouldUppercaseEscapedNames() bool IsRetryableError(err error) bool IdentifierFor(topicConfig kafkalib.TopicConfig, table string) types.TableIdentifier GetTableConfig(tableData *optimization.TableData) (*types.DwhTableConfig, error) diff --git a/lib/destination/types/table_config.go b/lib/destination/types/table_config.go index 755f5b66c..79d6515a0 100644 --- a/lib/destination/types/table_config.go +++ b/lib/destination/types/table_config.go @@ -64,15 +64,15 @@ func (d *DwhTableConfig) MutateInMemoryColumns(createTable bool, columnOp consta for _, col := range cols { d.columns.AddColumn(col) // Delete from the permissions table, if exists. - delete(d.columnsToDelete, col.RawName()) + delete(d.columnsToDelete, col.Name()) } d.createTable = createTable case constants.Delete: for _, col := range cols { // Delete from the permissions and in-memory table - d.columns.DeleteColumn(col.RawName()) - delete(d.columnsToDelete, col.RawName()) + d.columns.DeleteColumn(col.Name()) + delete(d.columnsToDelete, col.Name()) } } } @@ -91,7 +91,7 @@ func (d *DwhTableConfig) AuditColumnsToDelete(colsToDelete []columns.Column) { for colName := range d.columnsToDelete { var found bool for _, col := range colsToDelete { - if found = col.RawName() == colName; found { + if found = col.Name() == colName; found { break } } diff --git a/lib/destination/types/types.go b/lib/destination/types/types.go index 79f5df7a8..9b79b62d8 100644 --- a/lib/destination/types/types.go +++ b/lib/destination/types/types.go @@ -43,13 +43,6 @@ type AdditionalSettings struct { AdditionalCopyClause string } -type AppendOpts struct { - // TempTableID - sometimes the destination requires 2 steps to append to the table (e.g. Redshift), so we'll create and load the data into a staging table - // Redshift then has a separate step after `shared.Append(...)` to merge the two tables together. - TempTableID TableIdentifier - AdditionalCopyClause string -} - type TableIdentifier interface { Table() string WithTable(table string) TableIdentifier diff --git a/lib/optimization/event_bench_test.go b/lib/optimization/event_bench_test.go index 20a6d45d2..fc7987918 100644 --- a/lib/optimization/event_bench_test.go +++ b/lib/optimization/event_bench_test.go @@ -31,7 +31,7 @@ func BenchmarkTableData_ApproxSize_WideTable(b *testing.B) { "favorite_fruits": []string{"strawberry", "kiwi", "oranges"}, "random": false, "team": []string{"charlie", "jacqueline"}, - "email": "robin@artie.so", + "email": "robin@example.com", "favorite_languages": []string{"go", "sql"}, "favorite_databases": []string{"postgres", "bigtable"}, "created_at": time.Now(), diff --git a/lib/optimization/event_update_test.go b/lib/optimization/event_update_test.go index 2d5fa5e78..a61b04e2e 100644 --- a/lib/optimization/event_update_test.go +++ b/lib/optimization/event_update_test.go @@ -74,16 +74,16 @@ func TestTableData_UpdateInMemoryColumnsFromDestination(t *testing.T) { // Testing backfill for _, inMemoryCol := range tableData.inMemoryColumns.GetColumns() { - assert.False(t, inMemoryCol.Backfilled(), inMemoryCol.RawName()) + assert.False(t, inMemoryCol.Backfilled(), inMemoryCol.Name()) } backfilledCol := columns.NewColumn("bool_backfill", typing.Boolean) backfilledCol.SetBackfilled(true) assert.NoError(t, tableData.MergeColumnsFromDestination(backfilledCol)) for _, inMemoryCol := range tableData.inMemoryColumns.GetColumns() { - if inMemoryCol.RawName() == backfilledCol.RawName() { - assert.True(t, inMemoryCol.Backfilled(), inMemoryCol.RawName()) + if inMemoryCol.Name() == backfilledCol.Name() { + assert.True(t, inMemoryCol.Backfilled(), inMemoryCol.Name()) } else { - assert.False(t, inMemoryCol.Backfilled(), inMemoryCol.RawName()) + assert.False(t, inMemoryCol.Backfilled(), inMemoryCol.Name()) } } diff --git a/lib/optimization/table_data.go b/lib/optimization/table_data.go index e47839528..09017077e 100644 --- a/lib/optimization/table_data.go +++ b/lib/optimization/table_data.go @@ -66,10 +66,10 @@ func (t *TableData) ContainOtherOperations() bool { return t.containOtherOperations } -func (t *TableData) PrimaryKeys(uppercaseEscNames bool, destKind constants.DestinationKind) []columns.Wrapper { - var pks []columns.Wrapper +func (t *TableData) PrimaryKeys() []columns.Column { + var pks []columns.Column for _, pk := range t.primaryKeys { - pks = append(pks, columns.NewWrapper(columns.NewColumn(pk, typing.Invalid), uppercaseEscNames, destKind)) + pks = append(pks, columns.NewColumn(pk, typing.Invalid)) } return pks @@ -257,9 +257,9 @@ func (t *TableData) MergeColumnsFromDestination(destCols ...columns.Column) erro var foundColumn columns.Column var found bool for _, destCol := range destCols { - if destCol.RawName() == strings.ToLower(inMemoryCol.RawName()) { + if destCol.Name() == strings.ToLower(inMemoryCol.Name()) { if destCol.KindDetails.Kind == typing.Invalid.Kind { - return fmt.Errorf("column %q is invalid", destCol.RawName()) + return fmt.Errorf("column %q is invalid", destCol.Name()) } foundColumn = destCol diff --git a/lib/optimization/table_data_test.go b/lib/optimization/table_data_test.go index 74cea7f52..5a4e8f449 100644 --- a/lib/optimization/table_data_test.go +++ b/lib/optimization/table_data_test.go @@ -145,7 +145,7 @@ func TestTableData_UpdateInMemoryColumns(t *testing.T) { assert.True(t, isOk) extCol.KindDetails.ExtendedTimeDetails.Format = time.RFC3339Nano - tableData.inMemoryColumns.UpdateColumn(columns.NewColumn(extCol.RawName(), extCol.KindDetails)) + tableData.inMemoryColumns.UpdateColumn(columns.NewColumn(extCol.Name(), extCol.KindDetails)) for name, colKindDetails := range map[string]typing.KindDetails{ "foo": typing.String, diff --git a/lib/parquetutil/generate_schema.go b/lib/parquetutil/generate_schema.go index dfbbf1490..8f02657d2 100644 --- a/lib/parquetutil/generate_schema.go +++ b/lib/parquetutil/generate_schema.go @@ -12,7 +12,7 @@ func GenerateJSONSchema(columns []columns.Column) (string, error) { var fields []typing.Field for _, column := range columns { // We don't need to escape the column name here. - field, err := column.KindDetails.ParquetAnnotation(column.RawName()) + field, err := column.KindDetails.ParquetAnnotation(column.Name()) if err != nil { return "", err } diff --git a/lib/size/size_bench_test.go b/lib/size/size_bench_test.go index 423800c15..5031cd8f7 100644 --- a/lib/size/size_bench_test.go +++ b/lib/size/size_bench_test.go @@ -31,7 +31,7 @@ func BenchmarkGetApproxSize_WideTable(b *testing.B) { "favorite_fruits": []string{"strawberry", "kiwi", "oranges"}, "random": false, "team": []string{"charlie", "jacqueline"}, - "email": "robin@artie.so", + "email": "robin@example.com", "favorite_languages": []string{"go", "sql"}, "favorite_databases": []string{"postgres", "bigtable"}, "created_at": time.Now(), diff --git a/lib/sql/dialect.go b/lib/sql/dialect.go index 4eee6297d..f2d1e97cb 100644 --- a/lib/sql/dialect.go +++ b/lib/sql/dialect.go @@ -4,83 +4,74 @@ import ( "fmt" "log/slog" "slices" - "strconv" "strings" "github.com/artie-labs/transfer/lib/config/constants" ) type Dialect interface { - NeedsEscaping(identifier string) bool // TODO: Remove this when we escape everything QuoteIdentifier(identifier string) string + EscapeStruct(value string) string } -type DefaultDialect struct{} +type BigQueryDialect struct{} -func (DefaultDialect) NeedsEscaping(_ string) bool { return true } +func (BigQueryDialect) QuoteIdentifier(identifier string) string { + // BigQuery needs backticks to quote. + return fmt.Sprintf("`%s`", identifier) +} -func (DefaultDialect) QuoteIdentifier(identifier string) string { - return fmt.Sprintf(`"%s"`, identifier) +func (BigQueryDialect) EscapeStruct(value string) string { + return "JSON" + QuoteLiteral(value) } -type BigQueryDialect struct{} +type MSSQLDialect struct{} -func (BigQueryDialect) NeedsEscaping(_ string) bool { return true } +func (MSSQLDialect) QuoteIdentifier(identifier string) string { + return fmt.Sprintf(`"%s"`, identifier) +} -func (BigQueryDialect) QuoteIdentifier(identifier string) string { - // BigQuery needs backticks to quote. - return fmt.Sprintf("`%s`", identifier) +func (MSSQLDialect) EscapeStruct(value string) string { + panic("not implemented") // We don't currently support backfills for MS SQL. } type RedshiftDialect struct{} -func (RedshiftDialect) NeedsEscaping(_ string) bool { return true } - func (rd RedshiftDialect) QuoteIdentifier(identifier string) string { // Preserve the existing behavior of Redshift identifiers being lowercased due to not being quoted. return fmt.Sprintf(`"%s"`, strings.ToLower(identifier)) } -type SnowflakeDialect struct { - UppercaseEscNames bool +func (RedshiftDialect) EscapeStruct(value string) string { + return fmt.Sprintf("JSON_PARSE(%s)", QuoteLiteral(value)) } -// symbolsToEscape are additional keywords that we need to escape -var symbolsToEscape = []string{":"} +type SnowflakeDialect struct { + LegacyMode bool +} -func (sd SnowflakeDialect) NeedsEscaping(name string) bool { - if sd.UppercaseEscNames { - // If uppercaseEscNames is true then we will escape all identifiers that do not start with the Artie priefix. - // Since they will be uppercased afer they are escaped then they will result in the same value as if we - // we were to use them in a query without any escaping at all. - return true - } else { - if slices.Contains(constants.ReservedKeywords, name) { - return true - } - // If it does not contain any reserved words, does it contain any symbols that need to be escaped? - for _, symbol := range symbolsToEscape { - if strings.Contains(name, symbol) { - return true - } - } - // If it still doesn't need to be escaped, we should check if it's a number. - if _, err := strconv.Atoi(name); err == nil { - return true - } - return false - } +func (sd SnowflakeDialect) legacyNeedsEscaping(name string) bool { + return slices.Contains(constants.ReservedKeywords, name) || strings.Contains(name, ":") } func (sd SnowflakeDialect) QuoteIdentifier(identifier string) string { - if sd.UppercaseEscNames { - identifier = strings.ToUpper(identifier) + if sd.LegacyMode { + if sd.legacyNeedsEscaping(identifier) { + // In legacy mode we would have escaped this identifier which would have caused it to be lowercase. + slog.Warn("Escaped Snowflake identifier is not being uppercased", + slog.String("name", identifier), + ) + } else { + // Since this identifier wasn't previously escaped it will have been used uppercase. + identifier = strings.ToUpper(identifier) + } } else { - slog.Warn("Escaped Snowflake identifier is not being uppercased", - slog.String("name", identifier), - slog.Bool("uppercaseEscapedNames", sd.UppercaseEscNames), - ) + identifier = strings.ToUpper(identifier) } return fmt.Sprintf(`"%s"`, identifier) } + +func (SnowflakeDialect) EscapeStruct(value string) string { + return QuoteLiteral(value) +} diff --git a/lib/sql/dialect_test.go b/lib/sql/dialect_test.go index 9f1088077..c1fc565c5 100644 --- a/lib/sql/dialect_test.go +++ b/lib/sql/dialect_test.go @@ -6,8 +6,8 @@ import ( "github.com/stretchr/testify/assert" ) -func TestDefaultDialect_QuoteIdentifier(t *testing.T) { - dialect := DefaultDialect{} +func TestMSSQLDialect_QuoteIdentifier(t *testing.T) { + dialect := MSSQLDialect{} assert.Equal(t, `"foo"`, dialect.QuoteIdentifier("foo")) assert.Equal(t, `"FOO"`, dialect.QuoteIdentifier("FOO")) } @@ -24,39 +24,32 @@ func TestRedshiftDialect_QuoteIdentifier(t *testing.T) { assert.Equal(t, `"foo"`, dialect.QuoteIdentifier("FOO")) } -func TestSnowflakeDialect_NeedsEscaping(t *testing.T) { - { - // UppercaseEscNames enabled: - dialect := SnowflakeDialect{UppercaseEscNames: true} - - assert.True(t, dialect.NeedsEscaping("select")) // name that is reserved - assert.True(t, dialect.NeedsEscaping("foo")) // name that is not reserved - assert.True(t, dialect.NeedsEscaping("__artie_foo")) // Artie prefix - assert.True(t, dialect.NeedsEscaping("__artie_foo:bar")) // Artie prefix + symbol - } - - { - // UppercaseEscNames disabled: - dialect := SnowflakeDialect{UppercaseEscNames: false} - - assert.True(t, dialect.NeedsEscaping("select")) // name that is reserved - assert.False(t, dialect.NeedsEscaping("foo")) // name that is not reserved - assert.False(t, dialect.NeedsEscaping("__artie_foo")) // Artie prefix - assert.True(t, dialect.NeedsEscaping("__artie_foo:bar")) // Artie prefix + symbol - } +func TestSnowflakeDialect_LegacyNeedsEscaping(t *testing.T) { + dialect := SnowflakeDialect{} + assert.True(t, dialect.legacyNeedsEscaping("select")) // name that is reserved + assert.False(t, dialect.legacyNeedsEscaping("foo")) // name that is not reserved + assert.False(t, dialect.legacyNeedsEscaping("__artie_foo")) // Artie prefix + assert.True(t, dialect.legacyNeedsEscaping("__artie_foo:bar")) // Artie prefix + symbol } func TestSnowflakeDialect_QuoteIdentifier(t *testing.T) { { - // UppercaseEscNames enabled: - dialect := SnowflakeDialect{UppercaseEscNames: true} + // New mode: + dialect := SnowflakeDialect{LegacyMode: false} assert.Equal(t, `"FOO"`, dialect.QuoteIdentifier("foo")) assert.Equal(t, `"FOO"`, dialect.QuoteIdentifier("FOO")) + assert.Equal(t, `"SELECT"`, dialect.QuoteIdentifier("select")) + assert.Equal(t, `"GROUP"`, dialect.QuoteIdentifier("group")) } { - // UppercaseEscNames disabled: - dialect := SnowflakeDialect{UppercaseEscNames: false} - assert.Equal(t, `"foo"`, dialect.QuoteIdentifier("foo")) + // Legacy mode: + dialect := SnowflakeDialect{LegacyMode: true} + assert.Equal(t, `"FOO"`, dialect.QuoteIdentifier("foo")) assert.Equal(t, `"FOO"`, dialect.QuoteIdentifier("FOO")) + assert.Equal(t, `"abc:def"`, dialect.QuoteIdentifier("abc:def")) // symbol + assert.Equal(t, `"select"`, dialect.QuoteIdentifier("select")) // reserved name + assert.Equal(t, `"order"`, dialect.QuoteIdentifier("order")) // reserved name + assert.Equal(t, `"group"`, dialect.QuoteIdentifier("group")) // reserved name + assert.Equal(t, `"start"`, dialect.QuoteIdentifier("start")) // reserved name } } diff --git a/lib/sql/escape.go b/lib/sql/escape.go deleted file mode 100644 index 8ba1bf847..000000000 --- a/lib/sql/escape.go +++ /dev/null @@ -1,35 +0,0 @@ -package sql - -import ( - "github.com/artie-labs/transfer/lib/config/constants" -) - -func EscapeNameIfNecessary(name string, uppercaseEscNames bool, destKind constants.DestinationKind) string { - // TODO: Switch all calls of [EscapeNameIfNecessary] to [EscapeNameIfNecessaryUsingDialect] and kill this. - var dialect = dialectFor(destKind, uppercaseEscNames) - - if destKind != constants.S3 && dialect.NeedsEscaping(name) { - return dialect.QuoteIdentifier(name) - } - return name -} - -func EscapeNameIfNecessaryUsingDialect(name string, dialect Dialect) string { - if dialect.NeedsEscaping(name) { - return dialect.QuoteIdentifier(name) - } - return name -} - -func dialectFor(destKind constants.DestinationKind, uppercaseEscNames bool) Dialect { - switch destKind { - case constants.BigQuery: - return BigQueryDialect{} - case constants.Snowflake: - return SnowflakeDialect{UppercaseEscNames: uppercaseEscNames} - case constants.Redshift: - return RedshiftDialect{} - default: - return DefaultDialect{} - } -} diff --git a/lib/sql/escape_test.go b/lib/sql/escape_test.go deleted file mode 100644 index 1d5a83911..000000000 --- a/lib/sql/escape_test.go +++ /dev/null @@ -1,99 +0,0 @@ -package sql - -import ( - "testing" - - "github.com/artie-labs/transfer/lib/config/constants" - "github.com/stretchr/testify/assert" -) - -func TestEscapeNameIfNecessary(t *testing.T) { - type _testCase struct { - name string - nameToEscape string - destKind constants.DestinationKind - expectedName string - expectedNameWhenUpperCfg string - } - - testCases := []_testCase{ - { - name: "snowflake", - destKind: constants.Snowflake, - nameToEscape: "order", - expectedName: `"order"`, - expectedNameWhenUpperCfg: `"ORDER"`, - }, - { - name: "snowflake #2", - destKind: constants.Snowflake, - nameToEscape: "hello", - expectedName: `hello`, - expectedNameWhenUpperCfg: `"HELLO"`, - }, - { - name: "redshift", - destKind: constants.Redshift, - nameToEscape: "order", - expectedName: `"order"`, - expectedNameWhenUpperCfg: `"order"`, - }, - { - name: "redshift #2", - destKind: constants.Redshift, - nameToEscape: "hello", - expectedName: `"hello"`, - expectedNameWhenUpperCfg: `"hello"`, - }, - { - name: "bigquery", - destKind: constants.BigQuery, - nameToEscape: "order", - expectedName: "`order`", - expectedNameWhenUpperCfg: "`order`", - }, - { - name: "bigquery, #2", - destKind: constants.BigQuery, - nameToEscape: "hello", - expectedName: "`hello`", - expectedNameWhenUpperCfg: "`hello`", - }, - { - name: "redshift, #1 (delta)", - destKind: constants.Redshift, - nameToEscape: "delta", - expectedName: `"delta"`, - expectedNameWhenUpperCfg: `"delta"`, - }, - { - name: "snowflake, #1 (delta)", - destKind: constants.Snowflake, - nameToEscape: "delta", - expectedName: `delta`, - expectedNameWhenUpperCfg: `"DELTA"`, - }, - { - name: "redshift, symbols", - destKind: constants.Redshift, - nameToEscape: "receivedat:__", - expectedName: `"receivedat:__"`, - expectedNameWhenUpperCfg: `"receivedat:__"`, - }, - { - name: "redshift, numbers", - destKind: constants.Redshift, - nameToEscape: "0", - expectedName: `"0"`, - expectedNameWhenUpperCfg: `"0"`, - }, - } - - for _, testCase := range testCases { - actualName := EscapeNameIfNecessary(testCase.nameToEscape, false, testCase.destKind) - assert.Equal(t, testCase.expectedName, actualName, testCase.name) - - actualUpperName := EscapeNameIfNecessary(testCase.nameToEscape, true, testCase.destKind) - assert.Equal(t, testCase.expectedNameWhenUpperCfg, actualUpperName, testCase.name) - } -} diff --git a/lib/sql/util.go b/lib/sql/util.go new file mode 100644 index 000000000..b7dab812e --- /dev/null +++ b/lib/sql/util.go @@ -0,0 +1,23 @@ +package sql + +import ( + "fmt" + "strings" + + "github.com/artie-labs/transfer/lib/stringutil" +) + +// QuoteLiteral wraps a string with single quotes so that it can be used in a SQL query. +// If there are backslashes in the string, then they will be escaped to [\\]. +// After escaping backslashes, any remaining single quotes will be replaced with [\']. +func QuoteLiteral(value string) string { + return fmt.Sprintf("'%s'", strings.ReplaceAll(stringutil.EscapeBackslashes(value), "'", `\'`)) +} + +func QuoteIdentifiers(identifiers []string, dialect Dialect) []string { + result := make([]string, len(identifiers)) + for i, identifier := range identifiers { + result[i] = dialect.QuoteIdentifier(identifier) + } + return result +} diff --git a/lib/sql/util_test.go b/lib/sql/util_test.go new file mode 100644 index 000000000..1be71dfb9 --- /dev/null +++ b/lib/sql/util_test.go @@ -0,0 +1,45 @@ +package sql + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestQuoteLiteral(t *testing.T) { + testCases := []struct { + name string + colVal string + expected string + }{ + { + name: "string", + colVal: "hello", + expected: "'hello'", + }, + { + name: "string that requires escaping", + colVal: "bobby o'reilly", + expected: `'bobby o\'reilly'`, + }, + { + name: "string with line breaks", + colVal: "line1 \n line 2", + expected: "'line1 \n line 2'", + }, + { + name: "string with existing backslash", + colVal: `hello \ there \ hh`, + expected: `'hello \\ there \\ hh'`, + }, + } + + for _, testCase := range testCases { + assert.Equal(t, testCase.expected, QuoteLiteral(testCase.colVal), testCase.name) + } +} + +func TestQuoteIdentifiers(t *testing.T) { + assert.Equal(t, []string{}, QuoteIdentifiers([]string{}, BigQueryDialect{})) + assert.Equal(t, []string{"`a`", "`b`", "`c`"}, QuoteIdentifiers([]string{"a", "b", "c"}, BigQueryDialect{})) +} diff --git a/lib/stringutil/strings.go b/lib/stringutil/strings.go index c5083f884..fbff8b649 100644 --- a/lib/stringutil/strings.go +++ b/lib/stringutil/strings.go @@ -1,7 +1,6 @@ package stringutil import ( - "fmt" "math/rand" "strings" ) @@ -26,16 +25,8 @@ func Override(vals ...string) string { return retVal } -func Wrap(colVal any, noQuotes bool) string { - colVal = strings.ReplaceAll(fmt.Sprint(colVal), `\`, `\\`) - // The normal string escape is to do for O'Reilly is O\\'Reilly, but Snowflake escapes via \' - if noQuotes { - return fmt.Sprint(colVal) - } - - // When there is quote wrapping `foo -> 'foo'`, we'll need to escape `'` so the value compiles. - // However, if there are no quote wrapping, we should not need to escape. - return fmt.Sprintf("'%s'", strings.ReplaceAll(fmt.Sprint(colVal), "'", `\'`)) +func EscapeBackslashes(value string) string { + return strings.ReplaceAll(value, `\`, `\\`) } func Empty(vals ...string) bool { diff --git a/lib/stringutil/strings_test.go b/lib/stringutil/strings_test.go index 344e5dd69..b2d728a02 100644 --- a/lib/stringutil/strings_test.go +++ b/lib/stringutil/strings_test.go @@ -68,69 +68,36 @@ func TestOverride(t *testing.T) { } } -func TestWrap(t *testing.T) { - type _testCase struct { +func TestEscapeBackslashes(t *testing.T) { + testCases := []struct { name string - colVal any - noQuotes bool + colVal string expectedString string - } - - testCases := []_testCase{ + }{ { name: "string", colVal: "hello", - expectedString: "'hello'", - }, - { - name: "string (no quotes)", - colVal: "hello", - noQuotes: true, expectedString: "hello", }, { - name: "string (no quotes)", + name: "string", colVal: "bobby o'reilly", - noQuotes: true, expectedString: "bobby o'reilly", }, - { - name: "string that requires escaping", - colVal: "bobby o'reilly", - expectedString: `'bobby o\'reilly'`, - }, - { - name: "string that requires escaping (no quotes)", - colVal: "bobby o'reilly", - expectedString: `bobby o'reilly`, - noQuotes: true, - }, { name: "string with line breaks", colVal: "line1 \n line 2", - expectedString: "'line1 \n line 2'", - }, - { - name: "string with line breaks (no quotes)", - colVal: "line1 \n line 2", expectedString: "line1 \n line 2", - noQuotes: true, }, { name: "string with existing backslash", colVal: `hello \ there \ hh`, - expectedString: `'hello \\ there \\ hh'`, - }, - { - name: "string with existing backslash (no quotes)", - colVal: `hello \ there \ hh`, expectedString: `hello \\ there \\ hh`, - noQuotes: true, }, } for _, testCase := range testCases { - assert.Equal(t, testCase.expectedString, Wrap(testCase.colVal, testCase.noQuotes), testCase.name) + assert.Equal(t, testCase.expectedString, EscapeBackslashes(testCase.colVal), testCase.name) } } diff --git a/lib/telemetry/README.md b/lib/telemetry/README.md index e2a5de654..9fbb4541d 100644 --- a/lib/telemetry/README.md +++ b/lib/telemetry/README.md @@ -1,3 +1,3 @@ # Telemetry -Artie Transfer's docs have moved! Please visit this link to see documentation regarding Transfer's telemetry package. https://docs.artie.so/telemetry/overview +Artie Transfer's docs have moved! Please visit this link to see documentation regarding Transfer's telemetry package. https://docs.artie.com/telemetry/overview diff --git a/lib/typing/columns/columns.go b/lib/typing/columns/columns.go index 2ed85ff2f..19ffef0c3 100644 --- a/lib/typing/columns/columns.go +++ b/lib/typing/columns/columns.go @@ -79,17 +79,10 @@ func (c *Column) ShouldBackfill() bool { return c.defaultValue != nil && !c.backfilled } -func (c *Column) RawName() string { +func (c *Column) Name() string { return c.name } -// Name will give you c.name -// Plus we will escape it if the column name is part of the reserved words from destinations. -// If so, it'll change from `start` => `"start"` as suggested by Snowflake. -func (c *Column) Name(uppercaseEscNames bool, destKind constants.DestinationKind) string { - return sql.EscapeNameIfNecessary(c.name, uppercaseEscNames, destKind) -} - type Columns struct { columns []Column sync.RWMutex @@ -190,29 +183,7 @@ func (c *Columns) GetColumnsToUpdate() []string { continue } - cols = append(cols, col.RawName()) - } - - return cols -} - -// GetEscapedColumnsToUpdate will filter all the `Invalid` columns so that we do not update it. -// It will escape the returned columns. -func (c *Columns) GetEscapedColumnsToUpdate(uppercaseEscNames bool, destKind constants.DestinationKind) []string { - if c == nil { - return []string{} - } - - c.RLock() - defer c.RUnlock() - - var cols []string - for _, col := range c.columns { - if col.KindDetails == typing.Invalid { - continue - } - - cols = append(cols, col.Name(uppercaseEscNames, destKind)) + cols = append(cols, col.Name()) } return cols @@ -257,7 +228,7 @@ func (c *Columns) DeleteColumn(name string) { } // UpdateQuery will parse the columns and then returns a list of strings like: cc.first_name=c.first_name,cc.last_name=c.last_name,cc.email=c.email -func (c *Columns) UpdateQuery(destKind constants.DestinationKind, uppercaseEscNames bool, skipDeleteCol bool) string { +func (c *Columns) UpdateQuery(dialect sql.Dialect, skipDeleteCol bool) string { var cols []string for _, column := range c.GetColumns() { if column.ShouldSkip() { @@ -265,16 +236,16 @@ func (c *Columns) UpdateQuery(destKind constants.DestinationKind, uppercaseEscNa } // skipDeleteCol is useful because we don't want to copy the deleted column over to the source table if we're doing a hard row delete. - if skipDeleteCol && column.RawName() == constants.DeleteColumnMarker { + if skipDeleteCol && column.Name() == constants.DeleteColumnMarker { continue } - colName := column.Name(uppercaseEscNames, destKind) + colName := dialect.QuoteIdentifier(column.Name()) if column.ToastColumn { if column.KindDetails == typing.Struct { - cols = append(cols, processToastStructCol(colName, destKind)) + cols = append(cols, processToastStructCol(colName, dialect)) } else { - cols = append(cols, processToastCol(colName, destKind)) + cols = append(cols, processToastCol(colName, dialect)) } } else { @@ -286,16 +257,16 @@ func (c *Columns) UpdateQuery(destKind constants.DestinationKind, uppercaseEscNa return strings.Join(cols, ",") } -func processToastStructCol(colName string, destKind constants.DestinationKind) string { - switch destKind { - case constants.BigQuery: +func processToastStructCol(colName string, dialect sql.Dialect) string { + switch dialect.(type) { + case sql.BigQueryDialect: return fmt.Sprintf(`%s= CASE WHEN COALESCE(TO_JSON_STRING(cc.%s) != '{"key":"%s"}', true) THEN cc.%s ELSE c.%s END`, colName, colName, constants.ToastUnavailableValuePlaceholder, colName, colName) - case constants.Redshift: + case sql.RedshiftDialect: return fmt.Sprintf(`%s= CASE WHEN COALESCE(cc.%s != JSON_PARSE('{"key":"%s"}'), true) THEN cc.%s ELSE c.%s END`, colName, colName, constants.ToastUnavailableValuePlaceholder, colName, colName) - case constants.MSSQL: + case sql.MSSQLDialect: // Microsoft SQL Server doesn't allow boolean expressions to be in the COALESCE statement. return fmt.Sprintf("%s= CASE WHEN COALESCE(cc.%s, {}) != {'key': '%s'} THEN cc.%s ELSE c.%s END", colName, colName, constants.ToastUnavailableValuePlaceholder, colName, colName) @@ -306,8 +277,8 @@ func processToastStructCol(colName string, destKind constants.DestinationKind) s } } -func processToastCol(colName string, destKind constants.DestinationKind) string { - if destKind == constants.MSSQL { +func processToastCol(colName string, dialect sql.Dialect) string { + if _, ok := dialect.(sql.MSSQLDialect); ok { // Microsoft SQL Server doesn't allow boolean expressions to be in the COALESCE statement. return fmt.Sprintf("%s= CASE WHEN COALESCE(cc.%s, '') != '%s' THEN cc.%s ELSE c.%s END", colName, colName, constants.ToastUnavailableValuePlaceholder, colName, colName) diff --git a/lib/typing/columns/columns_test.go b/lib/typing/columns/columns_test.go index cb2e903a4..0ed03b152 100644 --- a/lib/typing/columns/columns_test.go +++ b/lib/typing/columns/columns_test.go @@ -6,6 +6,7 @@ import ( "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/ptr" + "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing" "github.com/stretchr/testify/assert" ) @@ -131,49 +132,6 @@ func TestColumn_ShouldBackfill(t *testing.T) { } } -func TestColumn_Name(t *testing.T) { - type _testCase struct { - colName string - expectedName string - // Snowflake - expectedNameEsc string - // BigQuery - expectedNameEscBq string - } - - testCases := []_testCase{ - { - colName: "start", - expectedName: "start", - expectedNameEsc: `"START"`, // since this is a reserved word. - expectedNameEscBq: "`start`", // BQ escapes via backticks. - }, - { - colName: "foo", - expectedName: "foo", - expectedNameEsc: `"FOO"`, - expectedNameEscBq: "`foo`", - }, - { - colName: "bar", - expectedName: "bar", - expectedNameEsc: `"BAR"`, - expectedNameEscBq: "`bar`", - }, - } - - for _, testCase := range testCases { - col := &Column{ - name: testCase.colName, - } - - assert.Equal(t, testCase.expectedName, col.RawName(), testCase.colName) - - assert.Equal(t, testCase.expectedNameEsc, col.Name(true, constants.Snowflake), testCase.colName) - assert.Equal(t, testCase.expectedNameEscBq, col.Name(false, constants.BigQuery), testCase.colName) - } -} - func TestColumns_GetColumnsToUpdate(t *testing.T) { type _testCase struct { name string @@ -228,64 +186,6 @@ func TestColumns_GetColumnsToUpdate(t *testing.T) { } } -func TestColumns_GetEscapedColumnsToUpdate(t *testing.T) { - type _testCase struct { - name string - cols []Column - expectedColsEsc []string - expectedColsEscBq []string - } - - var ( - happyPathCols = []Column{ - { - name: "hi", - KindDetails: typing.String, - }, - { - name: "bye", - KindDetails: typing.String, - }, - { - name: "start", - KindDetails: typing.String, - }, - } - ) - - extraCols := happyPathCols - for i := 0; i < 100; i++ { - extraCols = append(extraCols, Column{ - name: fmt.Sprintf("hello_%v", i), - KindDetails: typing.Invalid, - }) - } - - testCases := []_testCase{ - { - name: "happy path", - cols: happyPathCols, - expectedColsEsc: []string{`"HI"`, `"BYE"`, `"START"`}, - expectedColsEscBq: []string{"`hi`", "`bye`", "`start`"}, - }, - { - name: "happy path + extra col", - cols: extraCols, - expectedColsEsc: []string{`"HI"`, `"BYE"`, `"START"`}, - expectedColsEscBq: []string{"`hi`", "`bye`", "`start`"}, - }, - } - - for _, testCase := range testCases { - cols := &Columns{ - columns: testCase.cols, - } - - assert.Equal(t, testCase.expectedColsEsc, cols.GetEscapedColumnsToUpdate(true, constants.Snowflake), testCase.name) - assert.Equal(t, testCase.expectedColsEscBq, cols.GetEscapedColumnsToUpdate(false, constants.BigQuery), testCase.name) - } -} - func TestColumns_UpsertColumns(t *testing.T) { keys := []string{"a", "b", "c", "d", "e"} var cols Columns @@ -396,7 +296,7 @@ func TestColumnsUpdateQuery(t *testing.T) { name string columns Columns expectedString string - destKind constants.DestinationKind + dialect sql.Dialect skipDeleteCol bool } @@ -479,39 +379,39 @@ func TestColumnsUpdateQuery(t *testing.T) { { name: "happy path", columns: happyPathCols, - destKind: constants.Redshift, + dialect: sql.RedshiftDialect{}, expectedString: `"foo"=cc."foo","bar"=cc."bar"`, }, { name: "string and toast", columns: stringAndToastCols, - destKind: constants.Snowflake, - expectedString: "foo= CASE WHEN COALESCE(cc.foo != '__debezium_unavailable_value', true) THEN cc.foo ELSE c.foo END,bar=cc.bar", + dialect: sql.SnowflakeDialect{}, + expectedString: `"FOO"= CASE WHEN COALESCE(cc."FOO" != '__debezium_unavailable_value', true) THEN cc."FOO" ELSE c."FOO" END,"BAR"=cc."BAR"`, }, { name: "struct, string and toast string", columns: lastCaseColTypes, - destKind: constants.Redshift, + dialect: sql.RedshiftDialect{}, expectedString: `"a1"= CASE WHEN COALESCE(cc."a1" != JSON_PARSE('{"key":"__debezium_unavailable_value"}'), true) THEN cc."a1" ELSE c."a1" END,"b2"= CASE WHEN COALESCE(cc."b2" != '__debezium_unavailable_value', true) THEN cc."b2" ELSE c."b2" END,"c3"=cc."c3"`, }, { name: "struct, string and toast string (bigquery)", columns: lastCaseColTypes, - destKind: constants.BigQuery, + dialect: sql.BigQueryDialect{}, expectedString: "`a1`= CASE WHEN COALESCE(TO_JSON_STRING(cc.`a1`) != '{\"key\":\"__debezium_unavailable_value\"}', true) THEN cc.`a1` ELSE c.`a1` END,`b2`= CASE WHEN COALESCE(cc.`b2` != '__debezium_unavailable_value', true) THEN cc.`b2` ELSE c.`b2` END,`c3`=cc.`c3`", }, { - name: "struct, string and toast string (bigquery) w/ reserved keywords", - columns: lastCaseEscapeTypes, - destKind: constants.BigQuery, + name: "struct, string and toast string (bigquery) w/ reserved keywords", + columns: lastCaseEscapeTypes, + dialect: sql.BigQueryDialect{}, expectedString: fmt.Sprintf("`a1`= CASE WHEN COALESCE(TO_JSON_STRING(cc.`a1`) != '%s', true) THEN cc.`a1` ELSE c.`a1` END,`b2`= CASE WHEN COALESCE(cc.`b2` != '__debezium_unavailable_value', true) THEN cc.`b2` ELSE c.`b2` END,`c3`=cc.`c3`,%s,%s", key, fmt.Sprintf("`start`= CASE WHEN COALESCE(TO_JSON_STRING(cc.`start`) != '%s', true) THEN cc.`start` ELSE c.`start` END", key), "`select`=cc.`select`"), skipDeleteCol: true, }, { - name: "struct, string and toast string (bigquery) w/ reserved keywords", - columns: lastCaseEscapeTypes, - destKind: constants.BigQuery, + name: "struct, string and toast string (bigquery) w/ reserved keywords", + columns: lastCaseEscapeTypes, + dialect: sql.BigQueryDialect{}, expectedString: fmt.Sprintf("`a1`= CASE WHEN COALESCE(TO_JSON_STRING(cc.`a1`) != '%s', true) THEN cc.`a1` ELSE c.`a1` END,`b2`= CASE WHEN COALESCE(cc.`b2` != '__debezium_unavailable_value', true) THEN cc.`b2` ELSE c.`b2` END,`c3`=cc.`c3`,%s,%s", key, fmt.Sprintf("`start`= CASE WHEN COALESCE(TO_JSON_STRING(cc.`start`) != '%s', true) THEN cc.`start` ELSE c.`start` END", key), "`select`=cc.`select`,`__artie_delete`=cc.`__artie_delete`"), skipDeleteCol: false, @@ -519,7 +419,7 @@ func TestColumnsUpdateQuery(t *testing.T) { } for _, _testCase := range testCases { - actualQuery := _testCase.columns.UpdateQuery(_testCase.destKind, false, _testCase.skipDeleteCol) + actualQuery := _testCase.columns.UpdateQuery(_testCase.dialect, _testCase.skipDeleteCol) assert.Equal(t, _testCase.expectedString, actualQuery, _testCase.name) } } diff --git a/lib/typing/columns/columns_toast_test.go b/lib/typing/columns/columns_toast_test.go index 6056eb5f5..47ac0fa00 100644 --- a/lib/typing/columns/columns_toast_test.go +++ b/lib/typing/columns/columns_toast_test.go @@ -3,20 +3,20 @@ package columns import ( "testing" - "github.com/artie-labs/transfer/lib/config/constants" + "github.com/artie-labs/transfer/lib/sql" "github.com/stretchr/testify/assert" ) func TestProcessToastStructCol(t *testing.T) { - assert.Equal(t, `foo= CASE WHEN COALESCE(cc.foo != JSON_PARSE('{"key":"__debezium_unavailable_value"}'), true) THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", constants.Redshift)) - assert.Equal(t, `foo= CASE WHEN COALESCE(TO_JSON_STRING(cc.foo) != '{"key":"__debezium_unavailable_value"}', true) THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", constants.BigQuery)) - assert.Equal(t, `foo= CASE WHEN COALESCE(cc.foo != {'key': '__debezium_unavailable_value'}, true) THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", constants.Snowflake)) - assert.Equal(t, `foo= CASE WHEN COALESCE(cc.foo, {}) != {'key': '__debezium_unavailable_value'} THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", constants.MSSQL)) + assert.Equal(t, `foo= CASE WHEN COALESCE(cc.foo != JSON_PARSE('{"key":"__debezium_unavailable_value"}'), true) THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", sql.RedshiftDialect{})) + assert.Equal(t, `foo= CASE WHEN COALESCE(TO_JSON_STRING(cc.foo) != '{"key":"__debezium_unavailable_value"}', true) THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", sql.BigQueryDialect{})) + assert.Equal(t, `foo= CASE WHEN COALESCE(cc.foo != {'key': '__debezium_unavailable_value'}, true) THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", sql.SnowflakeDialect{})) + assert.Equal(t, `foo= CASE WHEN COALESCE(cc.foo, {}) != {'key': '__debezium_unavailable_value'} THEN cc.foo ELSE c.foo END`, processToastStructCol("foo", sql.MSSQLDialect{})) } func TestProcessToastCol(t *testing.T) { - assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar != '__debezium_unavailable_value', true) THEN cc.bar ELSE c.bar END`, processToastCol("bar", constants.Redshift)) - assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar != '__debezium_unavailable_value', true) THEN cc.bar ELSE c.bar END`, processToastCol("bar", constants.BigQuery)) - assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar != '__debezium_unavailable_value', true) THEN cc.bar ELSE c.bar END`, processToastCol("bar", constants.Snowflake)) - assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar, '') != '__debezium_unavailable_value' THEN cc.bar ELSE c.bar END`, processToastCol("bar", constants.MSSQL)) + assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar != '__debezium_unavailable_value', true) THEN cc.bar ELSE c.bar END`, processToastCol("bar", sql.RedshiftDialect{})) + assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar != '__debezium_unavailable_value', true) THEN cc.bar ELSE c.bar END`, processToastCol("bar", sql.BigQueryDialect{})) + assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar != '__debezium_unavailable_value', true) THEN cc.bar ELSE c.bar END`, processToastCol("bar", sql.SnowflakeDialect{})) + assert.Equal(t, `bar= CASE WHEN COALESCE(cc.bar, '') != '__debezium_unavailable_value' THEN cc.bar ELSE c.bar END`, processToastCol("bar", sql.MSSQLDialect{})) } diff --git a/lib/typing/columns/default.go b/lib/typing/columns/default.go index 45d2fa8f6..6624c4278 100644 --- a/lib/typing/columns/default.go +++ b/lib/typing/columns/default.go @@ -3,39 +3,26 @@ package columns import ( "fmt" - "github.com/artie-labs/transfer/lib/config/constants" + "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing/ext" - "github.com/artie-labs/transfer/lib/stringutil" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/decimal" ) -type DefaultValueArgs struct { - Escape bool - DestKind constants.DestinationKind -} - func (c *Column) RawDefaultValue() any { return c.defaultValue } -func (c *Column) DefaultValue(args *DefaultValueArgs, additionalDateFmts []string) (any, error) { - if args == nil || !args.Escape || c.defaultValue == nil { +func (c *Column) DefaultValue(dialect sql.Dialect, additionalDateFmts []string) (any, error) { + if c.defaultValue == nil { return c.defaultValue, nil } switch c.KindDetails.Kind { case typing.Struct.Kind, typing.Array.Kind: - switch args.DestKind { - case constants.BigQuery: - return "JSON" + stringutil.Wrap(c.defaultValue, false), nil - case constants.Redshift: - return fmt.Sprintf("JSON_PARSE(%s)", stringutil.Wrap(c.defaultValue, false)), nil - case constants.Snowflake: - return stringutil.Wrap(c.defaultValue, false), nil - } + return dialect.EscapeStruct(fmt.Sprint(c.defaultValue)), nil case typing.ETime.Kind: if c.KindDetails.ExtendedTimeDetails == nil { return nil, fmt.Errorf("column kind details for extended time is nil") @@ -48,9 +35,9 @@ func (c *Column) DefaultValue(args *DefaultValueArgs, additionalDateFmts []strin switch c.KindDetails.ExtendedTimeDetails.Type { case ext.TimeKindType: - return stringutil.Wrap(extTime.String(ext.PostgresTimeFormatNoTZ), false), nil + return sql.QuoteLiteral(extTime.String(ext.PostgresTimeFormatNoTZ)), nil default: - return stringutil.Wrap(extTime.String(c.KindDetails.ExtendedTimeDetails.Format), false), nil + return sql.QuoteLiteral(extTime.String(c.KindDetails.ExtendedTimeDetails.Format)), nil } case typing.EDecimal.Kind: val, isOk := c.defaultValue.(*decimal.Decimal) @@ -60,7 +47,7 @@ func (c *Column) DefaultValue(args *DefaultValueArgs, additionalDateFmts []strin return val.Value(), nil case typing.String.Kind: - return stringutil.Wrap(c.defaultValue, false), nil + return sql.QuoteLiteral(fmt.Sprint(c.defaultValue)), nil } return c.defaultValue, nil diff --git a/lib/typing/columns/default_test.go b/lib/typing/columns/default_test.go index 8c277b4df..0da9f8cfe 100644 --- a/lib/typing/columns/default_test.go +++ b/lib/typing/columns/default_test.go @@ -5,7 +5,7 @@ import ( "testing" "time" - "github.com/artie-labs/transfer/lib/config/constants" + "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing/ext" @@ -14,6 +14,12 @@ import ( "github.com/stretchr/testify/assert" ) +var dialects = []sql.Dialect{ + sql.BigQueryDialect{}, + sql.RedshiftDialect{}, + sql.SnowflakeDialect{}, +} + func TestColumn_DefaultValue(t *testing.T) { birthday := time.Date(2022, time.September, 6, 3, 19, 24, 942000000, time.UTC) birthdayExtDateTime, err := ext.ParseExtendedDateTime(birthday.Format(ext.ISO8601), nil) @@ -32,9 +38,9 @@ func TestColumn_DefaultValue(t *testing.T) { testCases := []struct { name string col *Column - args *DefaultValueArgs + dialect sql.Dialect expectedValue any - destKindToExpectedValueMap map[constants.DestinationKind]any + destKindToExpectedValueMap map[sql.Dialect]any }{ { name: "default value = nil", @@ -42,37 +48,14 @@ func TestColumn_DefaultValue(t *testing.T) { KindDetails: typing.String, defaultValue: nil, }, - args: &DefaultValueArgs{ - Escape: true, - }, expectedValue: nil, }, - { - name: "escaped args (nil)", - col: &Column{ - KindDetails: typing.String, - defaultValue: "abcdef", - }, - expectedValue: "abcdef", - }, - { - name: "escaped args (escaped = false)", - col: &Column{ - KindDetails: typing.String, - defaultValue: "abcdef", - }, - args: &DefaultValueArgs{}, - expectedValue: "abcdef", - }, { name: "string", col: &Column{ KindDetails: typing.String, defaultValue: "abcdef", }, - args: &DefaultValueArgs{ - Escape: true, - }, expectedValue: "'abcdef'", }, { @@ -81,14 +64,11 @@ func TestColumn_DefaultValue(t *testing.T) { KindDetails: typing.Struct, defaultValue: "{}", }, - args: &DefaultValueArgs{ - Escape: true, - }, expectedValue: `{}`, - destKindToExpectedValueMap: map[constants.DestinationKind]any{ - constants.BigQuery: "JSON'{}'", - constants.Redshift: `JSON_PARSE('{}')`, - constants.Snowflake: `'{}'`, + destKindToExpectedValueMap: map[sql.Dialect]any{ + dialects[0]: "JSON'{}'", + dialects[1]: `JSON_PARSE('{}')`, + dialects[2]: `'{}'`, }, }, { @@ -97,14 +77,11 @@ func TestColumn_DefaultValue(t *testing.T) { KindDetails: typing.Struct, defaultValue: "{\"age\": 0, \"membership_level\": \"standard\"}", }, - args: &DefaultValueArgs{ - Escape: true, - }, expectedValue: "{\"age\": 0, \"membership_level\": \"standard\"}", - destKindToExpectedValueMap: map[constants.DestinationKind]any{ - constants.BigQuery: "JSON'{\"age\": 0, \"membership_level\": \"standard\"}'", - constants.Redshift: "JSON_PARSE('{\"age\": 0, \"membership_level\": \"standard\"}')", - constants.Snowflake: "'{\"age\": 0, \"membership_level\": \"standard\"}'", + destKindToExpectedValueMap: map[sql.Dialect]any{ + dialects[0]: "JSON'{\"age\": 0, \"membership_level\": \"standard\"}'", + dialects[1]: "JSON_PARSE('{\"age\": 0, \"membership_level\": \"standard\"}')", + dialects[2]: "'{\"age\": 0, \"membership_level\": \"standard\"}'", }, }, { @@ -113,9 +90,6 @@ func TestColumn_DefaultValue(t *testing.T) { KindDetails: dateKind, defaultValue: birthdayExtDateTime, }, - args: &DefaultValueArgs{ - Escape: true, - }, expectedValue: "'2022-09-06'", }, { @@ -124,9 +98,6 @@ func TestColumn_DefaultValue(t *testing.T) { KindDetails: timeKind, defaultValue: birthdayExtDateTime, }, - args: &DefaultValueArgs{ - Escape: true, - }, expectedValue: "'03:19:24.942'", }, { @@ -135,29 +106,22 @@ func TestColumn_DefaultValue(t *testing.T) { KindDetails: dateTimeKind, defaultValue: birthdayExtDateTime, }, - args: &DefaultValueArgs{ - Escape: true, - }, expectedValue: "'2022-09-06T03:19:24.942Z'", }, } for _, testCase := range testCases { - for _, validDest := range constants.ValidDestinations { - if testCase.args != nil { - testCase.args.DestKind = validDest - } - - actualValue, actualErr := testCase.col.DefaultValue(testCase.args, nil) - assert.NoError(t, actualErr, fmt.Sprintf("%s %s", testCase.name, validDest)) + for _, dialect := range dialects { + actualValue, actualErr := testCase.col.DefaultValue(dialect, nil) + assert.NoError(t, actualErr, fmt.Sprintf("%s %s", testCase.name, dialect)) expectedValue := testCase.expectedValue - if potentialValue, isOk := testCase.destKindToExpectedValueMap[validDest]; isOk { + if potentialValue, isOk := testCase.destKindToExpectedValueMap[dialect]; isOk { // Not everything requires a destination specific value, so only use this if necessary. expectedValue = potentialValue } - assert.Equal(t, expectedValue, actualValue, fmt.Sprintf("%s %s", testCase.name, validDest)) + assert.Equal(t, expectedValue, actualValue, fmt.Sprintf("%s %s", testCase.name, dialect)) } } } diff --git a/lib/typing/columns/diff.go b/lib/typing/columns/diff.go index ac22c27d5..f717bca1f 100644 --- a/lib/typing/columns/diff.go +++ b/lib/typing/columns/diff.go @@ -40,7 +40,7 @@ func Diff(columnsInSource *Columns, columnsInDestination *Columns, softDelete bo targ := CloneColumns(columnsInDestination) var colsToDelete []Column for _, col := range src.GetColumns() { - _, isOk := targ.GetColumn(col.RawName()) + _, isOk := targ.GetColumn(col.Name()) if isOk { colsToDelete = append(colsToDelete, col) @@ -49,13 +49,13 @@ func Diff(columnsInSource *Columns, columnsInDestination *Columns, softDelete bo // We cannot delete inside a for-loop that is iterating over src.GetColumns() because we are messing up the array order. for _, colToDelete := range colsToDelete { - src.DeleteColumn(colToDelete.RawName()) - targ.DeleteColumn(colToDelete.RawName()) + src.DeleteColumn(colToDelete.Name()) + targ.DeleteColumn(colToDelete.Name()) } var targetColumnsMissing Columns for _, col := range src.GetColumns() { - if shouldSkipColumn(col.RawName(), softDelete, includeArtieUpdatedAt, includeDatabaseUpdatedAt, mode) { + if shouldSkipColumn(col.Name(), softDelete, includeArtieUpdatedAt, includeDatabaseUpdatedAt, mode) { continue } @@ -64,7 +64,7 @@ func Diff(columnsInSource *Columns, columnsInDestination *Columns, softDelete bo var sourceColumnsMissing Columns for _, col := range targ.GetColumns() { - if shouldSkipColumn(col.RawName(), softDelete, includeArtieUpdatedAt, includeDatabaseUpdatedAt, mode) { + if shouldSkipColumn(col.Name(), softDelete, includeArtieUpdatedAt, includeDatabaseUpdatedAt, mode) { continue } diff --git a/lib/typing/columns/diff_test.go b/lib/typing/columns/diff_test.go index 93a24e90e..8609cedeb 100644 --- a/lib/typing/columns/diff_test.go +++ b/lib/typing/columns/diff_test.go @@ -226,7 +226,7 @@ func TestDiffDeterministic(t *testing.T) { var key string for _, targetKeyMissing := range targetKeysMissing { - key += targetKeyMissing.RawName() + key += targetKeyMissing.Name() } retMap[key] = false diff --git a/lib/typing/columns/wrapper.go b/lib/typing/columns/wrapper.go deleted file mode 100644 index cb5f37643..000000000 --- a/lib/typing/columns/wrapper.go +++ /dev/null @@ -1,23 +0,0 @@ -package columns - -import "github.com/artie-labs/transfer/lib/config/constants" - -type Wrapper struct { - name string - escapedName string -} - -func NewWrapper(col Column, uppercaseEscNames bool, destKind constants.DestinationKind) Wrapper { - return Wrapper{ - name: col.name, - escapedName: col.Name(uppercaseEscNames, destKind), - } -} - -func (w Wrapper) EscapedName() string { - return w.escapedName -} - -func (w Wrapper) RawName() string { - return w.name -} diff --git a/lib/typing/columns/wrapper_test.go b/lib/typing/columns/wrapper_test.go deleted file mode 100644 index 38da079fb..000000000 --- a/lib/typing/columns/wrapper_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package columns - -import ( - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/artie-labs/transfer/lib/config/constants" - - "github.com/artie-labs/transfer/lib/typing" -) - -func TestWrapper_Complete(t *testing.T) { - type _testCase struct { - name string - expectedRawName string - expectedEscapedName string - expectedEscapedNameBQ string - } - - testCases := []_testCase{ - { - name: "happy", - expectedRawName: "happy", - expectedEscapedName: `"HAPPY"`, - expectedEscapedNameBQ: "`happy`", - }, - { - name: "user_id", - expectedRawName: "user_id", - expectedEscapedName: `"USER_ID"`, - expectedEscapedNameBQ: "`user_id`", - }, - { - name: "group", - expectedRawName: "group", - expectedEscapedName: `"GROUP"`, - expectedEscapedNameBQ: "`group`", - }, - } - - for _, testCase := range testCases { - // Snowflake escape - w := NewWrapper(NewColumn(testCase.name, typing.Invalid), true, constants.Snowflake) - - assert.Equal(t, testCase.expectedEscapedName, w.EscapedName(), testCase.name) - assert.Equal(t, testCase.expectedRawName, w.RawName(), testCase.name) - - // BigQuery escape - w = NewWrapper(NewColumn(testCase.name, typing.Invalid), false, constants.BigQuery) - - assert.Equal(t, testCase.expectedEscapedNameBQ, w.EscapedName(), testCase.name) - assert.Equal(t, testCase.expectedRawName, w.RawName(), testCase.name) - - for _, destKind := range []constants.DestinationKind{constants.Snowflake, constants.BigQuery} { - w = NewWrapper(NewColumn(testCase.name, typing.Invalid), false, destKind) - assert.Equal(t, testCase.expectedRawName, w.RawName(), testCase.name) - } - } -} diff --git a/lib/typing/values/string.go b/lib/typing/values/string.go index 9597717f0..60502fada 100644 --- a/lib/typing/values/string.go +++ b/lib/typing/values/string.go @@ -56,7 +56,7 @@ func ToString(colVal any, colKind columns.Column, additionalDateFmts []string) ( return string(colValBytes), nil } - return stringutil.Wrap(colVal, true), nil + return stringutil.EscapeBackslashes(fmt.Sprint(colVal)), nil case typing.Struct.Kind: if colKind.KindDetails == typing.Struct { if strings.Contains(fmt.Sprint(colVal), constants.ToastUnavailableValuePlaceholder) { diff --git a/models/event/event_save_test.go b/models/event/event_save_test.go index d37b7f2d0..6c7c98f01 100644 --- a/models/event/event_save_test.go +++ b/models/event/event_save_test.go @@ -48,7 +48,7 @@ func (e *EventsTestSuite) TestSaveEvent() { // Check the in-memory DB columns. var found int for _, col := range optimization.ReadOnlyInMemoryCols().GetColumns() { - if col.RawName() == expectedLowerCol || col.RawName() == anotherLowerCol { + if col.Name() == expectedLowerCol || col.Name() == anotherLowerCol { found += 1 } @@ -183,16 +183,16 @@ func (e *EventsTestSuite) TestEvent_SaveColumnsNoData() { td := e.db.GetOrCreateTableData("non_existent") var prevKey string for _, col := range td.ReadOnlyInMemoryCols().GetColumns() { - if col.RawName() == constants.DeleteColumnMarker { + if col.Name() == constants.DeleteColumnMarker { continue } if prevKey == "" { - prevKey = col.RawName() + prevKey = col.Name() continue } - currentKeyParsed, err := strconv.Atoi(col.RawName()) + currentKeyParsed, err := strconv.Atoi(col.Name()) assert.NoError(e.T(), err) prevKeyParsed, err := strconv.Atoi(prevKey) @@ -206,7 +206,7 @@ func (e *EventsTestSuite) TestEvent_SaveColumnsNoData() { evt.Columns.AddColumn(columns.NewColumn("foo", typing.Invalid)) var index int for idx, col := range evt.Columns.GetColumns() { - if col.RawName() == "foo" { + if col.Name() == "foo" { index = idx } }