Skip to content

Commit

Permalink
Merge branch 'master' into redshift-string-precision
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 authored Oct 14, 2024
2 parents 0b4a39c + 1b1ddbe commit b7da0d7
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 249 deletions.
108 changes: 0 additions & 108 deletions clients/redshift/dialect/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@ package dialect

import (
"fmt"
"strconv"
"strings"

"github.com/artie-labs/transfer/lib/config/constants"
"github.com/artie-labs/transfer/lib/sql"
"github.com/artie-labs/transfer/lib/typing"
"github.com/artie-labs/transfer/lib/typing/columns"
"github.com/artie-labs/transfer/lib/typing/ext"
)

type RedshiftDialect struct{}
Expand All @@ -23,112 +21,6 @@ func (RedshiftDialect) EscapeStruct(value string) string {
return fmt.Sprintf("JSON_PARSE(%s)", sql.QuoteLiteral(value))
}

func (RedshiftDialect) DataTypeForKind(kd typing.KindDetails, _ bool) string {
switch kd.Kind {
case typing.Integer.Kind:
if kd.OptionalIntegerKind != nil {
switch *kd.OptionalIntegerKind {
case typing.SmallIntegerKind:
return "INT2"
case typing.IntegerKind:
return "INT4"
case typing.NotSpecifiedKind, typing.BigIntegerKind:
fallthrough
default:
// By default, we are using a larger data type to avoid the possibility of an integer overflow.
return "INT8"
}
}

return "INT8"
case typing.Struct.Kind:
return "SUPER"
case typing.Array.Kind:
// Redshift does not have a built-in JSON type (which means we'll cast STRUCT and ARRAY kinds as TEXT).
// As a result, Artie will store this in JSON string and customers will need to extract this data out via SQL.
// Columns that are automatically created by Artie are created as VARCHAR(MAX).
// Rationale: https://github.com/artie-labs/transfer/pull/173
return "VARCHAR(MAX)"
case typing.String.Kind:
if kd.OptionalStringPrecision != nil {
return fmt.Sprintf("VARCHAR(%d)", *kd.OptionalStringPrecision)
}

return "VARCHAR(MAX)"
case typing.Boolean.Kind:
// We need to append `NULL` to let Redshift know that NULL is an acceptable data type.
return "BOOLEAN NULL"
case typing.ETime.Kind:
switch kd.ExtendedTimeDetails.Type {
case ext.TimestampTzKindType:
return "timestamp with time zone"
case ext.DateKindType:
return "date"
case ext.TimeKindType:
return "time"
}
case typing.EDecimal.Kind:
return kd.ExtendedDecimalDetails.RedshiftKind()
}

return kd.Kind
}

func (RedshiftDialect) KindForDataType(rawType string, stringPrecision string) (typing.KindDetails, error) {
rawType = strings.ToLower(rawType)
if strings.HasPrefix(rawType, "numeric") {
_, parameters, err := sql.ParseDataTypeDefinition(rawType)
if err != nil {
return typing.Invalid, err
}
return typing.ParseNumeric(parameters)
}

if strings.Contains(rawType, "character varying") {
precision, err := strconv.ParseInt(stringPrecision, 10, 32)
if err != nil {
return typing.Invalid, fmt.Errorf("failed to parse string precision: %q, err: %w", stringPrecision, err)
}

return typing.KindDetails{
Kind: typing.String.Kind,
OptionalStringPrecision: typing.ToPtr(int32(precision)),
}, nil
}

switch rawType {
case "super":
return typing.Struct, nil
case "smallint":
return typing.KindDetails{
Kind: typing.Integer.Kind,
OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind),
}, nil
case "integer":
return typing.KindDetails{
Kind: typing.Integer.Kind,
OptionalIntegerKind: typing.ToPtr(typing.IntegerKind),
}, nil
case "bigint":
return typing.KindDetails{
Kind: typing.Integer.Kind,
OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind),
}, nil
case "double precision":
return typing.Float, nil
case "timestamp with time zone", "timestamp without time zone":
return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimestampTzKindType), nil
case "time without time zone":
return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimeKindType), nil
case "date":
return typing.NewKindDetailsFromTemplate(typing.ETime, ext.DateKindType), nil
case "boolean":
return typing.Boolean, nil
}

return typing.Invalid, fmt.Errorf("unsupported data type: %q", rawType)
}

func (RedshiftDialect) IsColumnAlreadyExistsErr(err error) bool {
// Redshift's error: ERROR: column "foo" of relation "statement" already exists
return strings.Contains(err.Error(), "already exists")
Expand Down
140 changes: 0 additions & 140 deletions clients/redshift/dialect/dialect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/artie-labs/transfer/lib/mocks"
"github.com/artie-labs/transfer/lib/typing"
"github.com/artie-labs/transfer/lib/typing/columns"
"github.com/artie-labs/transfer/lib/typing/ext"
)

func TestRedshiftDialect_QuoteIdentifier(t *testing.T) {
Expand All @@ -19,145 +18,6 @@ func TestRedshiftDialect_QuoteIdentifier(t *testing.T) {
assert.Equal(t, `"foo"`, dialect.QuoteIdentifier("FOO"))
}

func TestRedshiftDialect_DataTypeForKind(t *testing.T) {
{
// String
{
assert.Equal(t, "VARCHAR(MAX)", RedshiftDialect{}.DataTypeForKind(typing.String, true))
}
{
assert.Equal(t, "VARCHAR(12345)", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(12345))}, false))
}
}
{
// Integers
{
// Small int
assert.Equal(t, "INT2", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind)}, false))
}
{
// Integer
assert.Equal(t, "INT4", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, false))
}
{
// Big integer
assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind)}, false))
}
{
// Not specified
{
// Literal
assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.NotSpecifiedKind)}, false))
}
{
assert.Equal(t, "INT8", RedshiftDialect{}.DataTypeForKind(typing.Integer, false))
}
}
}
}

func TestRedshiftDialect_KindForDataType(t *testing.T) {
dialect := RedshiftDialect{}
{
// Integers
{
// Small integer
kd, err := dialect.KindForDataType("smallint", "")
assert.NoError(t, err)
assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind)}, kd)
}
{
{
// Regular integers (upper)
kd, err := dialect.KindForDataType("INTEGER", "")
assert.NoError(t, err)
assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, kd)
}
{
// Regular integers (lower)
kd, err := dialect.KindForDataType("integer", "")
assert.NoError(t, err)
assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, kd)
}
}
{
// Big integer
kd, err := dialect.KindForDataType("bigint", "")
assert.NoError(t, err)
assert.Equal(t, typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind)}, kd)
}
}
{
// Double
{
kd, err := dialect.KindForDataType("double precision", "")
assert.NoError(t, err)
assert.Equal(t, typing.Float, kd)
}
{
kd, err := dialect.KindForDataType("DOUBLE precision", "")
assert.NoError(t, err)
assert.Equal(t, typing.Float, kd)
}
}
{
// Numeric
{
kd, err := dialect.KindForDataType("numeric(5,2)", "")
assert.NoError(t, err)
assert.Equal(t, typing.EDecimal.Kind, kd.Kind)
assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Precision())
assert.Equal(t, int32(2), kd.ExtendedDecimalDetails.Scale())
}
{
kd, err := dialect.KindForDataType("numeric(5,5)", "")
assert.NoError(t, err)
assert.Equal(t, typing.EDecimal.Kind, kd.Kind)
assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Precision())
assert.Equal(t, int32(5), kd.ExtendedDecimalDetails.Scale())
}
}
{
// Boolean
kd, err := dialect.KindForDataType("boolean", "")
assert.NoError(t, err)
assert.Equal(t, typing.Boolean, kd)
}
{
// String with precision
kd, err := dialect.KindForDataType("character varying", "65535")
assert.NoError(t, err)
assert.Equal(t, typing.KindDetails{Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(65535))}, kd)
}
{
// Times
{
kd, err := dialect.KindForDataType("timestamp with time zone", "")
assert.NoError(t, err)
assert.Equal(t, typing.ETime.Kind, kd.Kind)
assert.Equal(t, ext.TimestampTzKindType, kd.ExtendedTimeDetails.Type)
}
{
kd, err := dialect.KindForDataType("timestamp without time zone", "")
assert.NoError(t, err)
assert.Equal(t, typing.ETime.Kind, kd.Kind)
assert.Equal(t, ext.TimestampTzKindType, kd.ExtendedTimeDetails.Type)
}
{
kd, err := dialect.KindForDataType("time without time zone", "")
assert.NoError(t, err)
assert.Equal(t, typing.ETime.Kind, kd.Kind)
assert.Equal(t, ext.TimeKindType, kd.ExtendedTimeDetails.Type)
}
{
kd, err := dialect.KindForDataType("date", "")
assert.NoError(t, err)
assert.Equal(t, typing.ETime.Kind, kd.Kind)
assert.Equal(t, ext.DateKindType, kd.ExtendedTimeDetails.Type)
}
}
}

func TestRedshiftDialect_IsColumnAlreadyExistsErr(t *testing.T) {
{
// Irrelevant error
Expand Down
117 changes: 117 additions & 0 deletions clients/redshift/dialect/typing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package dialect

import (
"fmt"
"strconv"
"strings"

"github.com/artie-labs/transfer/lib/sql"
"github.com/artie-labs/transfer/lib/typing"
"github.com/artie-labs/transfer/lib/typing/ext"
)

func (RedshiftDialect) DataTypeForKind(kd typing.KindDetails, _ bool) string {
switch kd.Kind {
case typing.Integer.Kind:
if kd.OptionalIntegerKind != nil {
switch *kd.OptionalIntegerKind {
case typing.SmallIntegerKind:
return "INT2"
case typing.IntegerKind:
return "INT4"
case typing.NotSpecifiedKind, typing.BigIntegerKind:
fallthrough
default:
// By default, we are using a larger data type to avoid the possibility of an integer overflow.
return "INT8"
}
}

return "INT8"
case typing.Struct.Kind:
return "SUPER"
case typing.Array.Kind:
// Redshift does not have a built-in JSON type (which means we'll cast STRUCT and ARRAY kinds as TEXT).
// As a result, Artie will store this in JSON string and customers will need to extract this data out via SQL.
// Columns that are automatically created by Artie are created as VARCHAR(MAX).
// Rationale: https://github.com/artie-labs/transfer/pull/173
return "VARCHAR(MAX)"
case typing.String.Kind:
if kd.OptionalStringPrecision != nil {
return fmt.Sprintf("VARCHAR(%d)", *kd.OptionalStringPrecision)
}

return "VARCHAR(MAX)"
case typing.Boolean.Kind:
// We need to append `NULL` to let Redshift know that NULL is an acceptable data type.
return "BOOLEAN NULL"
case typing.ETime.Kind:
switch kd.ExtendedTimeDetails.Type {
case ext.TimestampTzKindType:
return "timestamp with time zone"
case ext.DateKindType:
return "date"
case ext.TimeKindType:
return "time"
}
case typing.EDecimal.Kind:
return kd.ExtendedDecimalDetails.RedshiftKind()
}

return kd.Kind
}

func (RedshiftDialect) KindForDataType(rawType string, stringPrecision string) (typing.KindDetails, error) {
rawType = strings.ToLower(rawType)
if strings.HasPrefix(rawType, "numeric") {
_, parameters, err := sql.ParseDataTypeDefinition(rawType)
if err != nil {
return typing.Invalid, err
}
return typing.ParseNumeric(parameters)
}

if strings.Contains(rawType, "character varying") {
precision, err := strconv.ParseInt(stringPrecision, 10, 32)
if err != nil {
return typing.Invalid, fmt.Errorf("failed to parse string precision: %q, err: %w", stringPrecision, err)
}

return typing.KindDetails{
Kind: typing.String.Kind,
OptionalStringPrecision: typing.ToPtr(int32(precision)),
}, nil
}

switch rawType {
case "super":
return typing.Struct, nil
case "smallint":
return typing.KindDetails{
Kind: typing.Integer.Kind,
OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind),
}, nil
case "integer":
return typing.KindDetails{
Kind: typing.Integer.Kind,
OptionalIntegerKind: typing.ToPtr(typing.IntegerKind),
}, nil
case "bigint":
return typing.KindDetails{
Kind: typing.Integer.Kind,
OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind),
}, nil
case "double precision":
return typing.Float, nil
case "timestamp with time zone", "timestamp without time zone":
return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimestampTzKindType), nil
case "time without time zone":
return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimeKindType), nil
case "date":
return typing.NewKindDetailsFromTemplate(typing.ETime, ext.DateKindType), nil
case "boolean":
return typing.Boolean, nil
}

return typing.Invalid, fmt.Errorf("unsupported data type: %q", rawType)
}
Loading

0 comments on commit b7da0d7

Please sign in to comment.