diff --git a/clients/databricks/dialect/dialect.go b/clients/databricks/dialect/dialect.go new file mode 100644 index 000000000..fcddec707 --- /dev/null +++ b/clients/databricks/dialect/dialect.go @@ -0,0 +1,51 @@ +package dialect + +import ( + "fmt" + + "github.com/artie-labs/transfer/lib/typing/ext" + + "github.com/artie-labs/transfer/lib/typing" +) + +type DatabricksDialect struct{} + +func (DatabricksDialect) QuoteIdentifier(identifier string) string { + return fmt.Sprintf("`%s`", identifier) +} + +func (DatabricksDialect) EscapeStruct(value string) string { + panic("not implemented") +} + +func (DatabricksDialect) DataTypeForKind(kindDetails typing.KindDetails, isPk bool) string { + switch kindDetails.Kind { + case typing.Float.Kind: + return "DOUBLE" + case typing.Integer.Kind: + return "INT" + case typing.Struct.Kind: + return "VARIANT" + case typing.Array.Kind: + // Databricks requires arrays to be typed. As such, we're going to use an array of strings. + return "ARRAY" + case typing.String.Kind: + return "STRING" + case typing.Boolean.Kind: + return "BOOLEAN" + case typing.ETime.Kind: + switch kindDetails.ExtendedTimeDetails.Type { + case ext.TimestampTzKindType: + // Using datetime2 because it's the recommendation, and it provides more precision: https://stackoverflow.com/a/1884088 + return "TIMESTAMP" + case ext.DateKindType: + return "DATE" + case ext.TimeKindType: + return "STRING" + } + case typing.EDecimal.Kind: + return kindDetails.ExtendedDecimalDetails.DatabricksKind() + } + + return kindDetails.Kind +} diff --git a/lib/typing/decimal/details.go b/lib/typing/decimal/details.go index f48ee485b..e6b7b8779 100644 --- a/lib/typing/decimal/details.go +++ b/lib/typing/decimal/details.go @@ -52,6 +52,12 @@ func (d Details) SnowflakeKind() string { return d.toKind(MaxPrecisionBeforeString, "STRING") } +// DatabricksKind - is used to determine whether a NUMERIC data type should be a STRING or NUMERIC(p, s). +// Ref: https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html +func (d Details) DatabricksKind() string { + return d.toKind(MaxPrecisionBeforeString, "STRING") +} + // MsSQLKind - Has the same limitation as Redshift // Spec: https://learn.microsoft.com/en-us/sql/t-sql/data-types/decimal-and-numeric-transact-sql?view=sql-server-ver16#arguments func (d Details) MsSQLKind() string {