From c4bcf6b831867a99caaba7add2e387597670eb7c Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Wed, 9 Oct 2024 17:53:01 -0700 Subject: [PATCH] Moving stuff around. --- clients/redshift/dialect/dialect.go | 110 ----------------- clients/redshift/dialect/dialect_test.go | 140 --------------------- clients/redshift/dialect/typing.go | 117 ++++++++++++++++++ clients/redshift/dialect/typing_test.go | 148 +++++++++++++++++++++++ lib/typing/decimal/base.go | 2 +- 5 files changed, 266 insertions(+), 251 deletions(-) create mode 100644 clients/redshift/dialect/typing.go create mode 100644 clients/redshift/dialect/typing_test.go diff --git a/clients/redshift/dialect/dialect.go b/clients/redshift/dialect/dialect.go index 3460e4a08..7755660b0 100644 --- a/clients/redshift/dialect/dialect.go +++ b/clients/redshift/dialect/dialect.go @@ -2,14 +2,10 @@ package dialect import ( "fmt" - "strconv" - "strings" - "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/sql" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" - "github.com/artie-labs/transfer/lib/typing/ext" ) type RedshiftDialect struct{} @@ -23,112 +19,6 @@ func (RedshiftDialect) EscapeStruct(value string) string { return fmt.Sprintf("JSON_PARSE(%s)", sql.QuoteLiteral(value)) } -func (RedshiftDialect) DataTypeForKind(kd typing.KindDetails, _ bool) string { - switch kd.Kind { - case typing.Integer.Kind: - if kd.OptionalIntegerKind != nil { - switch *kd.OptionalIntegerKind { - case typing.SmallIntegerKind: - return "INT2" - case typing.IntegerKind: - return "INT4" - case typing.NotSpecifiedKind, typing.BigIntegerKind: - fallthrough - default: - // By default, we are using a larger data type to avoid the possibility of an integer overflow. - return "INT8" - } - } - - return "INT8" - case typing.Struct.Kind: - return "SUPER" - case typing.Array.Kind: - // Redshift does not have a built-in JSON type (which means we'll cast STRUCT and ARRAY kinds as TEXT). - // As a result, Artie will store this in JSON string and customers will need to extract this data out via SQL. - // Columns that are automatically created by Artie are created as VARCHAR(MAX). - // Rationale: https://github.com/artie-labs/transfer/pull/173 - return "VARCHAR(MAX)" - case typing.String.Kind: - if kd.OptionalStringPrecision != nil { - return fmt.Sprintf("VARCHAR(%d)", *kd.OptionalStringPrecision) - } - - return "VARCHAR(MAX)" - case typing.Boolean.Kind: - // We need to append `NULL` to let Redshift know that NULL is an acceptable data type. - return "BOOLEAN NULL" - case typing.ETime.Kind: - switch kd.ExtendedTimeDetails.Type { - case ext.TimestampTzKindType: - return "timestamp with time zone" - case ext.DateKindType: - return "date" - case ext.TimeKindType: - return "time" - } - case typing.EDecimal.Kind: - return kd.ExtendedDecimalDetails.RedshiftKind() - } - - return kd.Kind -} - -func (RedshiftDialect) KindForDataType(rawType string, stringPrecision string) (typing.KindDetails, error) { - rawType = strings.ToLower(rawType) - if strings.HasPrefix(rawType, "numeric") { - _, parameters, err := sql.ParseDataTypeDefinition(rawType) - if err != nil { - return typing.Invalid, err - } - return typing.ParseNumeric(parameters) - } - - if strings.Contains(rawType, "character varying") { - precision, err := strconv.ParseInt(stringPrecision, 10, 32) - if err != nil { - return typing.Invalid, fmt.Errorf("failed to parse string precision: %q, err: %w", stringPrecision, err) - } - - return typing.KindDetails{ - Kind: typing.String.Kind, - OptionalStringPrecision: typing.ToPtr(int32(precision)), - }, nil - } - - switch rawType { - case "super": - return typing.Struct, nil - case "smallint": - return typing.KindDetails{ - Kind: typing.Integer.Kind, - OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind), - }, nil - case "integer": - return typing.KindDetails{ - Kind: typing.Integer.Kind, - OptionalIntegerKind: typing.ToPtr(typing.IntegerKind), - }, nil - case "bigint": - return typing.KindDetails{ - Kind: typing.Integer.Kind, - OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind), - }, nil - case "double precision": - return typing.Float, nil - case "timestamp with time zone", "timestamp without time zone": - return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimestampTzKindType), nil - case "time without time zone": - return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimeKindType), nil - case "date": - return typing.NewKindDetailsFromTemplate(typing.ETime, ext.DateKindType), nil - case "boolean": - return typing.Boolean, nil - } - - return typing.Invalid, fmt.Errorf("unsupported data type: %q", rawType) -} - func (RedshiftDialect) IsColumnAlreadyExistsErr(err error) bool { // Redshift's error: ERROR: column "foo" of relation "statement" already exists return strings.Contains(err.Error(), "already exists") diff --git a/clients/redshift/dialect/dialect_test.go b/clients/redshift/dialect/dialect_test.go index f285c7671..8084eee5e 100644 --- a/clients/redshift/dialect/dialect_test.go +++ b/clients/redshift/dialect/dialect_test.go @@ -10,7 +10,6 @@ import ( "github.com/artie-labs/transfer/lib/mocks" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" - "github.com/artie-labs/transfer/lib/typing/ext" ) func TestRedshiftDialect_QuoteIdentifier(t *testing.T) { @@ -19,145 +18,6 @@ func TestRedshiftDialect_QuoteIdentifier(t *testing.T) { assert.Equal(t, `"foo"`, dialect.QuoteIdentifier("FOO")) } -func TestRedshiftDialect_DataTypeForKind(t *testing.T) { - { - // String - { - assert.Equal(t, "VARCHAR(MAX)", RedshiftDialect{}.DataTypeForKind(typing.String, true)) - } - { - assert.Equal(t, "VARCHAR(12345)", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(12345))}, false)) - } - } - { - // Integers - { - // Small int - assert.Equal(t, "INT2", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind)}, false)) - } - { - // Integer - assert.Equal(t, "INT4", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, false)) - } - { - // Big integer - assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind)}, false)) - } - { - // Not specified - { - // Literal - assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.NotSpecifiedKind)}, false)) - } - { - assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.Integer, false)) - } - } - } -} - -func TestRedshiftDialect_KindForDataType(t *testing.T) { - dialect := RedshiftDialect{} - { - // Integers - { - // Small integer - kd, err := dialect.KindForDataType("smallint", "") - assert.NoError(t, err) - assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind)}, kd) - } - { - { - // Regular integers (upper) - kd, err := dialect.KindForDataType("INTEGER", "") - assert.NoError(t, err) - assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, kd) - } - { - // Regular integers (lower) - kd, err := dialect.KindForDataType("integer", "") - assert.NoError(t, err) - assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, kd) - } - } - { - // Big integer - kd, err := dialect.KindForDataType("bigint", "") - assert.NoError(t, err) - assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind)}, kd) - } - } - { - // Double - { - kd, err := dialect.KindForDataType("double precision", "") - assert.NoError(t, err) - assert.Equal(t, typing.Float, kd) - } - { - kd, err := dialect.KindForDataType("DOUBLE precision", "") - assert.NoError(t, err) - assert.Equal(t, typing.Float, kd) - } - } - { - // Numeric - { - kd, err := dialect.KindForDataType("numeric(5,2)", "") - assert.NoError(t, err) - assert.Equal(t, typing.EDecimal.Kind, kd.Kind) - assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Precision()) - assert.Equal(t, int32(2), kd.ExtendedDecimalDetails.Scale()) - } - { - kd, err := dialect.KindForDataType("numeric(5,5)", "") - assert.NoError(t, err) - assert.Equal(t, typing.EDecimal.Kind, kd.Kind) - assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Precision()) - assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Scale()) - } - } - { - // Boolean - kd, err := dialect.KindForDataType("boolean", "") - assert.NoError(t, err) - assert.Equal(t, typing.Boolean, kd) - } - { - // String with precision - kd, err := dialect.KindForDataType("character varying", "65535") - assert.NoError(t, err) - assert.Equal(t, typing.KindDetails{Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(65535))}, kd) - } - { - // Times - { - kd, err := dialect.KindForDataType("timestamp with time zone", "") - assert.NoError(t, err) - assert.Equal(t, typing.ETime.Kind, kd.Kind) - assert.Equal(t, ext.TimestampTzKindType, kd.ExtendedTimeDetails.Type) - } - { - kd, err := dialect.KindForDataType("timestamp without time zone", "") - assert.NoError(t, err) - assert.Equal(t, typing.ETime.Kind, kd.Kind) - assert.Equal(t, ext.TimestampTzKindType, kd.ExtendedTimeDetails.Type) - } - { - kd, err := dialect.KindForDataType("time without time zone", "") - assert.NoError(t, err) - assert.Equal(t, typing.ETime.Kind, kd.Kind) - assert.Equal(t, ext.TimeKindType, kd.ExtendedTimeDetails.Type) - } - { - kd, err := dialect.KindForDataType("date", "") - assert.NoError(t, err) - assert.Equal(t, typing.ETime.Kind, kd.Kind) - assert.Equal(t, ext.DateKindType, kd.ExtendedTimeDetails.Type) - } - } -} - func TestRedshiftDialect_IsColumnAlreadyExistsErr(t *testing.T) { { // Irrelevant error diff --git a/clients/redshift/dialect/typing.go b/clients/redshift/dialect/typing.go new file mode 100644 index 000000000..3394fd2c0 --- /dev/null +++ b/clients/redshift/dialect/typing.go @@ -0,0 +1,117 @@ +package dialect + +import ( + "fmt" + "strconv" + "strings" + + "github.com/artie-labs/transfer/lib/sql" + "github.com/artie-labs/transfer/lib/typing" + "github.com/artie-labs/transfer/lib/typing/ext" +) + +func (RedshiftDialect) DataTypeForKind(kd typing.KindDetails, _ bool) string { + switch kd.Kind { + case typing.Integer.Kind: + if kd.OptionalIntegerKind != nil { + switch *kd.OptionalIntegerKind { + case typing.SmallIntegerKind: + return "INT2" + case typing.IntegerKind: + return "INT4" + case typing.NotSpecifiedKind, typing.BigIntegerKind: + fallthrough + default: + // By default, we are using a larger data type to avoid the possibility of an integer overflow. + return "INT8" + } + } + + return "INT8" + case typing.Struct.Kind: + return "SUPER" + case typing.Array.Kind: + // Redshift does not have a built-in JSON type (which means we'll cast STRUCT and ARRAY kinds as TEXT). + // As a result, Artie will store this in JSON string and customers will need to extract this data out via SQL. + // Columns that are automatically created by Artie are created as VARCHAR(MAX). + // Rationale: https://github.com/artie-labs/transfer/pull/173 + return "VARCHAR(MAX)" + case typing.String.Kind: + if kd.OptionalStringPrecision != nil { + return fmt.Sprintf("VARCHAR(%d)", *kd.OptionalStringPrecision) + } + + return "VARCHAR(MAX)" + case typing.Boolean.Kind: + // We need to append `NULL` to let Redshift know that NULL is an acceptable data type. + return "BOOLEAN NULL" + case typing.ETime.Kind: + switch kd.ExtendedTimeDetails.Type { + case ext.TimestampTzKindType: + return "timestamp with time zone" + case ext.DateKindType: + return "date" + case ext.TimeKindType: + return "time" + } + case typing.EDecimal.Kind: + return kd.ExtendedDecimalDetails.RedshiftKind() + } + + return kd.Kind +} + +func (RedshiftDialect) KindForDataType(rawType string, stringPrecision string) (typing.KindDetails, error) { + rawType = strings.ToLower(rawType) + if strings.HasPrefix(rawType, "numeric") { + _, parameters, err := sql.ParseDataTypeDefinition(rawType) + if err != nil { + return typing.Invalid, err + } + return typing.ParseNumeric(parameters) + } + + if strings.Contains(rawType, "character varying") { + precision, err := strconv.ParseInt(stringPrecision, 10, 32) + if err != nil { + return typing.Invalid, fmt.Errorf("failed to parse string precision: %q, err: %w", stringPrecision, err) + } + + return typing.KindDetails{ + Kind: typing.String.Kind, + OptionalStringPrecision: typing.ToPtr(int32(precision)), + }, nil + } + + switch rawType { + case "super": + return typing.Struct, nil + case "smallint": + return typing.KindDetails{ + Kind: typing.Integer.Kind, + OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind), + }, nil + case "integer": + return typing.KindDetails{ + Kind: typing.Integer.Kind, + OptionalIntegerKind: typing.ToPtr(typing.IntegerKind), + }, nil + case "bigint": + return typing.KindDetails{ + Kind: typing.Integer.Kind, + OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind), + }, nil + case "double precision": + return typing.Float, nil + case "timestamp with time zone", "timestamp without time zone": + return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimestampTzKindType), nil + case "time without time zone": + return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimeKindType), nil + case "date": + return typing.NewKindDetailsFromTemplate(typing.ETime, ext.DateKindType), nil + case "boolean": + return typing.Boolean, nil + } + + return typing.Invalid, fmt.Errorf("unsupported data type: %q", rawType) +} diff --git a/clients/redshift/dialect/typing_test.go b/clients/redshift/dialect/typing_test.go new file mode 100644 index 000000000..784eb76cc --- /dev/null +++ b/clients/redshift/dialect/typing_test.go @@ -0,0 +1,148 @@ +package dialect + +import ( + "testing" + + "github.com/artie-labs/transfer/lib/typing" + "github.com/artie-labs/transfer/lib/typing/ext" + "github.com/stretchr/testify/assert" +) + +func TestRedshiftDialect_DataTypeForKind(t *testing.T) { + { + // String + { + assert.Equal(t, "VARCHAR(MAX)", RedshiftDialect{}.DataTypeForKind(typing.String, true)) + } + { + assert.Equal(t, "VARCHAR(12345)", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(12345))}, false)) + } + } + { + // Integers + { + // Small int + assert.Equal(t, "INT2", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind)}, false)) + } + { + // Integer + assert.Equal(t, "INT4", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, false)) + } + { + // Big integer + assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind)}, false)) + } + { + // Not specified + { + // Literal + assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.NotSpecifiedKind)}, false)) + } + { + assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.Integer, false)) + } + } + } +} + +func TestRedshiftDialect_KindForDataType(t *testing.T) { + dialect := RedshiftDialect{} + { + // Integers + { + // Small integer + kd, err := dialect.KindForDataType("smallint", "") + assert.NoError(t, err) + assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind)}, kd) + } + { + { + // Regular integers (upper) + kd, err := dialect.KindForDataType("INTEGER", "") + assert.NoError(t, err) + assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, kd) + } + { + // Regular integers (lower) + kd, err := dialect.KindForDataType("integer", "") + assert.NoError(t, err) + assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, kd) + } + } + { + // Big integer + kd, err := dialect.KindForDataType("bigint", "") + assert.NoError(t, err) + assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind)}, kd) + } + } + { + // Double + { + kd, err := dialect.KindForDataType("double precision", "") + assert.NoError(t, err) + assert.Equal(t, typing.Float, kd) + } + { + kd, err := dialect.KindForDataType("DOUBLE precision", "") + assert.NoError(t, err) + assert.Equal(t, typing.Float, kd) + } + } + { + // Numeric + { + kd, err := dialect.KindForDataType("numeric(5,2)", "") + assert.NoError(t, err) + assert.Equal(t, typing.EDecimal.Kind, kd.Kind) + assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Precision()) + assert.Equal(t, int32(2), kd.ExtendedDecimalDetails.Scale()) + } + { + kd, err := dialect.KindForDataType("numeric(5,5)", "") + assert.NoError(t, err) + assert.Equal(t, typing.EDecimal.Kind, kd.Kind) + assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Precision()) + assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Scale()) + } + } + { + // Boolean + kd, err := dialect.KindForDataType("boolean", "") + assert.NoError(t, err) + assert.Equal(t, typing.Boolean, kd) + } + { + // String with precision + kd, err := dialect.KindForDataType("character varying", "65535") + assert.NoError(t, err) + assert.Equal(t, typing.KindDetails{Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(65535))}, kd) + } + { + // Times + { + kd, err := dialect.KindForDataType("timestamp with time zone", "") + assert.NoError(t, err) + assert.Equal(t, typing.ETime.Kind, kd.Kind) + assert.Equal(t, ext.TimestampTzKindType, kd.ExtendedTimeDetails.Type) + } + { + kd, err := dialect.KindForDataType("timestamp without time zone", "") + assert.NoError(t, err) + assert.Equal(t, typing.ETime.Kind, kd.Kind) + assert.Equal(t, ext.TimestampTzKindType, kd.ExtendedTimeDetails.Type) + } + { + kd, err := dialect.KindForDataType("time without time zone", "") + assert.NoError(t, err) + assert.Equal(t, typing.ETime.Kind, kd.Kind) + assert.Equal(t, ext.TimeKindType, kd.ExtendedTimeDetails.Type) + } + { + kd, err := dialect.KindForDataType("date", "") + assert.NoError(t, err) + assert.Equal(t, typing.ETime.Kind, kd.Kind) + assert.Equal(t, ext.DateKindType, kd.ExtendedTimeDetails.Type) + } + } +} diff --git a/lib/typing/decimal/base.go b/lib/typing/decimal/base.go index 060eda28a..bc44242b9 100644 --- a/lib/typing/decimal/base.go +++ b/lib/typing/decimal/base.go @@ -39,7 +39,7 @@ func (d Details) toKind(maxPrecision int32, exceededKind string) string { return exceededKind } - return fmt.Sprintf("NUMERIC(%v, %v)", d.precision, d.scale) + return fmt.Sprintf("NUMERIC(%d, %d)", d.precision, d.scale) } func (d Details) toDecimalKind(maxPrecision int32, exceededKind string) string {