diff --git a/clients/databricks/dialect/dialect.go b/clients/databricks/dialect/dialect.go index fcddec707..d189bedc3 100644 --- a/clients/databricks/dialect/dialect.go +++ b/clients/databricks/dialect/dialect.go @@ -2,10 +2,6 @@ package dialect import ( "fmt" - - "github.com/artie-labs/transfer/lib/typing/ext" - - "github.com/artie-labs/transfer/lib/typing" ) type DatabricksDialect struct{} @@ -17,35 +13,3 @@ func (DatabricksDialect) QuoteIdentifier(identifier string) string { func (DatabricksDialect) EscapeStruct(value string) string { panic("not implemented") } - -func (DatabricksDialect) DataTypeForKind(kindDetails typing.KindDetails, isPk bool) string { - switch kindDetails.Kind { - case typing.Float.Kind: - return "DOUBLE" - case typing.Integer.Kind: - return "INT" - case typing.Struct.Kind: - return "VARIANT" - case typing.Array.Kind: - // Databricks requires arrays to be typed. As such, we're going to use an array of strings. - return "ARRAY" - case typing.String.Kind: - return "STRING" - case typing.Boolean.Kind: - return "BOOLEAN" - case typing.ETime.Kind: - switch kindDetails.ExtendedTimeDetails.Type { - case ext.TimestampTzKindType: - // Using datetime2 because it's the recommendation, and it provides more precision: https://stackoverflow.com/a/1884088 - return "TIMESTAMP" - case ext.DateKindType: - return "DATE" - case ext.TimeKindType: - return "STRING" - } - case typing.EDecimal.Kind: - return kindDetails.ExtendedDecimalDetails.DatabricksKind() - } - - return kindDetails.Kind -} diff --git a/clients/databricks/dialect/typing.go b/clients/databricks/dialect/typing.go new file mode 100644 index 000000000..dda7fd954 --- /dev/null +++ b/clients/databricks/dialect/typing.go @@ -0,0 +1,82 @@ +package dialect + +import ( + "fmt" + "strings" + + "github.com/artie-labs/transfer/lib/sql" + "github.com/artie-labs/transfer/lib/typing" + "github.com/artie-labs/transfer/lib/typing/ext" +) + +func (DatabricksDialect) DataTypeForKind(kindDetails typing.KindDetails, isPk bool) string { + switch kindDetails.Kind { + case typing.Float.Kind: + return "DOUBLE" + case typing.Integer.Kind: + return "BIGINT" + case typing.Struct.Kind: + return "VARIANT" + case typing.Array.Kind: + // Databricks requires arrays to be typed. As such, we're going to use an array of strings. + return "ARRAY" + case typing.String.Kind: + return "STRING" + case typing.Boolean.Kind: + return "BOOLEAN" + case typing.ETime.Kind: + switch kindDetails.ExtendedTimeDetails.Type { + case ext.TimestampTzKindType: + // Using datetime2 because it's the recommendation, and it provides more precision: https://stackoverflow.com/a/1884088 + return "TIMESTAMP" + case ext.DateKindType: + return "DATE" + case ext.TimeKindType: + return "STRING" + } + case typing.EDecimal.Kind: + return kindDetails.ExtendedDecimalDetails.DatabricksKind() + } + + return kindDetails.Kind +} + +func (DatabricksDialect) KindForDataType(rawType string, _ string) (typing.KindDetails, error) { + rawType = strings.ToLower(rawType) + if strings.HasPrefix(rawType, "decimal") { + _, parameters, err := sql.ParseDataTypeDefinition(rawType) + if err != nil { + return typing.Invalid, err + } + return typing.ParseNumeric(parameters), nil + } + + if strings.HasPrefix(rawType, "array") { + return typing.Array, nil + } + + switch rawType { + case "string", "binary": + return typing.String, nil + case "bigint": + return typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.BigIntegerKind)}, nil + case "boolean": + return typing.Boolean, nil + case "date": + return typing.NewKindDetailsFromTemplate(typing.ETime, ext.DateKindType), nil + case "double", "float": + return typing.Float, nil + case "int": + return typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.IntegerKind)}, nil + case "smallint", "tinyint": + return typing.KindDetails{Kind: typing.Integer.Kind, OptionalIntegerKind: typing.ToPtr(typing.SmallIntegerKind)}, nil + case "timestamp": + return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimestampTzKindType), nil + case "timestamp_ntz": + return typing.NewKindDetailsFromTemplate(typing.ETime, ext.TimestampTzKindType), nil + case "variant", "object": + return typing.Struct, nil + } + + return typing.Invalid, fmt.Errorf("unsupported data type: %q", rawType) +}