From b79fa0a126ea96dcdc781506eefc1145dd566430 Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Tue, 27 Jun 2023 15:36:23 -0700 Subject: [PATCH] Don't backfill or update table column if it's invalid (#140) --- clients/bigquery/merge.go | 4 ++++ clients/redshift/redshift.go | 4 ++++ clients/snowflake/staging.go | 4 ++++ lib/dwh/ddl/ddl.go | 2 +- lib/typing/columns/columns.go | 12 +++++++++-- lib/typing/columns/columns_test.go | 32 ++++++++++++++++++++++++++++++ 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/clients/bigquery/merge.go b/clients/bigquery/merge.go index 4e709fdca..4c4811271 100644 --- a/clients/bigquery/merge.go +++ b/clients/bigquery/merge.go @@ -185,6 +185,10 @@ func (s *Store) Merge(ctx context.Context, tableData *optimization.TableData) er // Backfill columns if necessary for _, col := range tableData.ReadOnlyInMemoryCols().GetColumns() { + if col.ShouldSkip() { + continue + } + var attempts int for { err = s.backfillColumn(ctx, col, tableData.ToFqName(ctx, s.Label())) diff --git a/clients/redshift/redshift.go b/clients/redshift/redshift.go index 94bd03fbd..6b66b0435 100644 --- a/clients/redshift/redshift.go +++ b/clients/redshift/redshift.go @@ -122,6 +122,10 @@ func (s *Store) Merge(ctx context.Context, tableData *optimization.TableData) er // Now iterate over all the in-memory cols and see which one requires backfill. for _, col := range tableData.ReadOnlyInMemoryCols().GetColumns() { + if col.ShouldSkip() { + continue + } + err = utils.BackfillColumn(ctx, s, col, tableData.ToFqName(ctx, s.Label())) if err != nil { defaultVal, _ := col.DefaultValue(nil) diff --git a/clients/snowflake/staging.go b/clients/snowflake/staging.go index 2edf0c0f9..eafb34a26 100644 --- a/clients/snowflake/staging.go +++ b/clients/snowflake/staging.go @@ -181,6 +181,10 @@ func (s *Store) mergeWithStages(ctx context.Context, tableData *optimization.Tab // Now iterate over all the in-memory cols and see which one requires backfill. for _, col := range tableData.ReadOnlyInMemoryCols().GetColumns() { + if col.ShouldSkip() { + continue + } + err = utils.BackfillColumn(ctx, s, col, tableData.ToFqName(ctx, s.Label())) if err != nil { defaultVal, _ := col.DefaultValue(nil) diff --git a/lib/dwh/ddl/ddl.go b/lib/dwh/ddl/ddl.go index 0d887907c..e3e43f4d6 100644 --- a/lib/dwh/ddl/ddl.go +++ b/lib/dwh/ddl/ddl.go @@ -74,7 +74,7 @@ func AlterTable(ctx context.Context, args AlterTableArgs, cols ...columns.Column // It's okay to combine since args.ColumnOp only takes one of: `Delete` or `Add` var colSQLParts []string for _, col := range cols { - if col.KindDetails == typing.Invalid { + if col.ShouldSkip() { // Let's not modify the table if the column kind is invalid continue } diff --git a/lib/typing/columns/columns.go b/lib/typing/columns/columns.go index d6b538814..57eff7ba6 100644 --- a/lib/typing/columns/columns.go +++ b/lib/typing/columns/columns.go @@ -23,6 +23,14 @@ type Column struct { backfilled bool } +func (c *Column) ShouldSkip() bool { + if c == nil || c.KindDetails.Kind == typing.Invalid.Kind { + return true + } + + return false +} + func UnescapeColumnName(escapedName string, destKind constants.DestinationKind) string { if destKind == constants.BigQuery { return strings.ReplaceAll(escapedName, "`", "") @@ -63,8 +71,8 @@ func (c *Column) ShouldBackfill() bool { return false } - if c.KindDetails.Kind == typing.Invalid.Kind { - // Don't backfill if the in-memory data is `INVALID` + if c.ShouldSkip() { + // Don't backfill return false } diff --git a/lib/typing/columns/columns_test.go b/lib/typing/columns/columns_test.go index 7f3fee23e..1bc914973 100644 --- a/lib/typing/columns/columns_test.go +++ b/lib/typing/columns/columns_test.go @@ -13,6 +13,38 @@ import ( "github.com/stretchr/testify/assert" ) +func TestColumn_ShouldSkip(t *testing.T) { + type _testCase struct { + name string + col *Column + expectedResult bool + } + + testCases := []_testCase{ + { + name: "col is nil", + expectedResult: true, + }, + { + name: "invalid column", + col: &Column{ + KindDetails: typing.Invalid, + }, + expectedResult: true, + }, + { + name: "normal column", + col: &Column{ + KindDetails: typing.String, + }, + }, + } + + for _, testCase := range testCases { + assert.Equal(t, testCase.expectedResult, testCase.col.ShouldSkip(), testCase.name) + } +} + func TestColumn_ShouldBackfill(t *testing.T) { type _testCase struct { name string