diff --git a/clients/bigquery/dialect/dialect.go b/clients/bigquery/dialect/dialect.go index da3506bc3..3692759ce 100644 --- a/clients/bigquery/dialect/dialect.go +++ b/clients/bigquery/dialect/dialect.go @@ -47,6 +47,8 @@ func (BigQueryDialect) DataTypeForKind(kindDetails typing.KindDetails, _ bool) s case typing.Struct.Kind: // Struct is a tighter version of JSON that requires type casting like Struct return "json" + case typing.Date.Kind: + return "date" case typing.ETime.Kind: switch kindDetails.ExtendedTimeDetails.Type { case ext.TimestampTZKindType: @@ -55,8 +57,6 @@ func (BigQueryDialect) DataTypeForKind(kindDetails typing.KindDetails, _ bool) s return "timestamp" case ext.TimestampNTZKindType: return "datetime" - case ext.DateKindType: - return "date" case ext.TimeKindType: return "time" } @@ -112,7 +112,7 @@ func (BigQueryDialect) KindForDataType(rawBqType string, _ string) (typing.KindD case "time": return typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, "") case "date": - return typing.NewExtendedTimeDetails(typing.ETime, ext.DateKindType, "") + return typing.Date, nil default: return typing.Invalid, nil } diff --git a/clients/bigquery/dialect/dialect_test.go b/clients/bigquery/dialect/dialect_test.go index 1ab1a42e9..660d952cb 100644 --- a/clients/bigquery/dialect/dialect_test.go +++ b/clients/bigquery/dialect/dialect_test.go @@ -95,7 +95,7 @@ func TestBigQueryDialect_KindForDataType(t *testing.T) { "datetime": typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, ""), "timestamp": typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, ""), "time": typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimeKindType, ""), - "date": typing.MustNewExtendedTimeDetails(typing.ETime, ext.DateKindType, ""), + "date": typing.Date, //Invalid "foo": typing.Invalid, "foofoo": typing.Invalid, @@ -132,7 +132,7 @@ func TestBigQueryDialect_KindForDataType_NoDataLoss(t *testing.T) { kindDetails := []typing.KindDetails{ typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, ""), typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimeKindType, ""), - typing.MustNewExtendedTimeDetails(typing.ETime, ext.DateKindType, ""), + typing.Date, typing.String, typing.Boolean, typing.Struct, diff --git a/clients/bigquery/storagewrite.go b/clients/bigquery/storagewrite.go index 44a72e3f3..386755894 100644 --- a/clients/bigquery/storagewrite.go +++ b/clients/bigquery/storagewrite.go @@ -38,12 +38,12 @@ func columnToTableFieldSchema(column columns.Column) (*storagepb.TableFieldSchem fieldType = storagepb.TableFieldSchema_STRING case typing.String.Kind: fieldType = storagepb.TableFieldSchema_STRING + case typing.Date.Kind: + fieldType = storagepb.TableFieldSchema_DATE case typing.ETime.Kind: switch column.KindDetails.ExtendedTimeDetails.Type { case ext.TimeKindType: fieldType = storagepb.TableFieldSchema_TIME - case ext.DateKindType: - fieldType = storagepb.TableFieldSchema_DATE case ext.TimestampTZKindType: fieldType = storagepb.TableFieldSchema_TIMESTAMP case ext.TimestampNTZKindType: @@ -201,6 +201,14 @@ func rowToMessage(row map[string]any, columns []columns.Column, messageDescripto } message.Set(field, protoreflect.ValueOfString(castedValue)) + case typing.Date.Kind: + _time, err := ext.ParseDateFromInterface(value) + if err != nil { + return nil, fmt.Errorf("failed to cast value as time.Time, value: '%v', err: %w", value, err) + } + + daysSinceEpoch := _time.Unix() / (60 * 60 * 24) + message.Set(field, protoreflect.ValueOfInt32(int32(daysSinceEpoch))) case typing.ETime.Kind: if err := column.KindDetails.EnsureExtendedTimeDetails(); err != nil { return nil, err @@ -214,9 +222,6 @@ func rowToMessage(row map[string]any, columns []columns.Column, messageDescripto switch column.KindDetails.ExtendedTimeDetails.Type { case ext.TimeKindType: message.Set(field, protoreflect.ValueOfInt64(encodePacked64TimeMicros(_time))) - case ext.DateKindType: - daysSinceEpoch := _time.Unix() / (60 * 60 * 24) - message.Set(field, protoreflect.ValueOfInt32(int32(daysSinceEpoch))) case ext.TimestampTZKindType: if err = timestamppb.New(_time).CheckValid(); err != nil { return nil, err diff --git a/clients/bigquery/storagewrite_test.go b/clients/bigquery/storagewrite_test.go index 3d5f45b80..1ec789b9e 100644 --- a/clients/bigquery/storagewrite_test.go +++ b/clients/bigquery/storagewrite_test.go @@ -49,8 +49,8 @@ func TestColumnToTableFieldSchema(t *testing.T) { assert.Equal(t, storagepb.TableFieldSchema_TIME, fieldSchema.Type) } { - // ETime - Date: - fieldSchema, err := columnToTableFieldSchema(columns.NewColumn("foo", typing.MustNewExtendedTimeDetails(typing.ETime, ext.DateKindType, ""))) + // Date + fieldSchema, err := columnToTableFieldSchema(columns.NewColumn("foo", typing.Date)) assert.NoError(t, err) assert.Equal(t, storagepb.TableFieldSchema_DATE, fieldSchema.Type) } @@ -168,7 +168,7 @@ func TestRowToMessage(t *testing.T) { columns.NewColumn("c_string", typing.String), columns.NewColumn("c_string_decimal", typing.String), columns.NewColumn("c_time", typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimeKindType, "")), - columns.NewColumn("c_date", typing.MustNewExtendedTimeDetails(typing.ETime, ext.DateKindType, "")), + columns.NewColumn("c_date", typing.Date), columns.NewColumn("c_datetime", typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, "")), columns.NewColumn("c_struct", typing.Struct), columns.NewColumn("c_array", typing.Array), @@ -188,7 +188,7 @@ func TestRowToMessage(t *testing.T) { "c_string": "foo bar", "c_string_decimal": decimal.NewDecimal(numbers.MustParseDecimal("1.61803")), "c_time": ext.NewExtendedTime(time.Date(0, 0, 0, 4, 5, 6, 7, time.UTC), ext.TimeKindType, ""), - "c_date": ext.NewExtendedTime(time.Date(2001, 2, 3, 0, 0, 0, 0, time.UTC), ext.DateKindType, ""), + "c_date": time.Date(2001, 2, 3, 0, 0, 0, 0, time.UTC), "c_datetime": ext.NewExtendedTime(time.Date(2001, 2, 3, 4, 5, 6, 7, time.UTC), ext.TimestampTZKindType, ""), "c_struct": map[string]any{"baz": []string{"foo", "bar"}}, "c_array": []string{"foo", "bar"}, diff --git a/clients/databricks/dialect/typing.go b/clients/databricks/dialect/typing.go index ebf2c51b8..65f0c989d 100644 --- a/clients/databricks/dialect/typing.go +++ b/clients/databricks/dialect/typing.go @@ -24,6 +24,8 @@ func (DatabricksDialect) DataTypeForKind(kindDetails typing.KindDetails, _ bool) return "STRING" case typing.Boolean.Kind: return "BOOLEAN" + case typing.Date.Kind: + return "DATE" case typing.ETime.Kind: switch kindDetails.ExtendedTimeDetails.Type { case ext.TimestampTZKindType: @@ -32,8 +34,6 @@ func (DatabricksDialect) DataTypeForKind(kindDetails typing.KindDetails, _ bool) // This is currently in public preview, to use this, the customer will need to enable [timestampNtz] in their delta tables. // Ref: https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html return "TIMESTAMP_NTZ" - case ext.DateKindType: - return "DATE" case ext.TimeKindType: return "STRING" } @@ -66,7 +66,7 @@ func (DatabricksDialect) KindForDataType(rawType string, _ string) (typing.KindD case "boolean": return typing.Boolean, nil case "date": - return typing.NewExtendedTimeDetails(typing.ETime, ext.DateKindType, "") + return typing.Date, nil case "double", "float": return typing.Float, nil case "int": diff --git a/clients/databricks/dialect/typing_test.go b/clients/databricks/dialect/typing_test.go index 3ec84773a..5b964f253 100644 --- a/clients/databricks/dialect/typing_test.go +++ b/clients/databricks/dialect/typing_test.go @@ -38,7 +38,7 @@ func TestDatabricksDialect_DataTypeForKind(t *testing.T) { // Times { // Date - assert.Equal(t, "DATE", DatabricksDialect{}.DataTypeForKind(typing.KindDetails{Kind: typing.ETime.Kind, ExtendedTimeDetails: &ext.NestedKind{Type: ext.DateKindType}}, false)) + assert.Equal(t, "DATE", DatabricksDialect{}.DataTypeForKind(typing.Date, false)) } { // Timestamp @@ -115,7 +115,7 @@ func TestDatabricksDialect_KindForDataType(t *testing.T) { // Date kd, err := DatabricksDialect{}.KindForDataType("DATE", "") assert.NoError(t, err) - assert.Equal(t, typing.MustNewExtendedTimeDetails(typing.ETime, ext.DateKindType, ""), kd) + assert.Equal(t, typing.Date.Kind, kd) } { // Double diff --git a/clients/mssql/dialect/dialect.go b/clients/mssql/dialect/dialect.go index ba5e3a5d7..8a3cafe26 100644 --- a/clients/mssql/dialect/dialect.go +++ b/clients/mssql/dialect/dialect.go @@ -53,6 +53,8 @@ func (MSSQLDialect) DataTypeForKind(kindDetails typing.KindDetails, isPk bool) s return "VARCHAR(MAX)" case typing.Boolean.Kind: return "BIT" + case typing.Date.Kind: + return "DATE" case typing.ETime.Kind: switch kindDetails.ExtendedTimeDetails.Type { case ext.TimestampTZKindType: @@ -60,8 +62,6 @@ func (MSSQLDialect) DataTypeForKind(kindDetails typing.KindDetails, isPk bool) s case ext.TimestampNTZKindType: // Using datetime2 because it's the recommendation, and it provides more precision: https://stackoverflow.com/a/1884088 return "datetime2" - case ext.DateKindType: - return "date" case ext.TimeKindType: return "time" } @@ -122,7 +122,7 @@ func (MSSQLDialect) KindForDataType(rawType string, stringPrecision string) (typ case "time": return typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, "") case "date": - return typing.NewExtendedTimeDetails(typing.ETime, ext.DateKindType, "") + return typing.Date, nil case "bit": return typing.Boolean, nil case "text": diff --git a/clients/mssql/dialect/dialect_test.go b/clients/mssql/dialect/dialect_test.go index a1c522004..bcb0dd1d8 100644 --- a/clients/mssql/dialect/dialect_test.go +++ b/clients/mssql/dialect/dialect_test.go @@ -62,7 +62,7 @@ func TestMSSQLDialect_KindForDataType(t *testing.T) { "float": typing.Float, "real": typing.Float, "bit": typing.Boolean, - "date": typing.MustNewExtendedTimeDetails(typing.ETime, ext.DateKindType, ""), + "date": typing.Date, "time": typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimeKindType, ""), "datetime": typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, ""), "datetime2": typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, ""), diff --git a/clients/redshift/dialect/typing.go b/clients/redshift/dialect/typing.go index 7039c4357..be7a7dbba 100644 --- a/clients/redshift/dialect/typing.go +++ b/clients/redshift/dialect/typing.go @@ -45,14 +45,14 @@ func (RedshiftDialect) DataTypeForKind(kd typing.KindDetails, _ bool) string { case typing.Boolean.Kind: // We need to append `NULL` to let Redshift know that NULL is an acceptable data type. return "BOOLEAN NULL" + case typing.Date.Kind: + return "DATE" case typing.ETime.Kind: switch kd.ExtendedTimeDetails.Type { case ext.TimestampTZKindType: return "timestamp with time zone" case ext.TimestampNTZKindType: return "timestamp without time zone" - case ext.DateKindType: - return "date" case ext.TimeKindType: return "time" } @@ -112,7 +112,7 @@ func (RedshiftDialect) KindForDataType(rawType string, stringPrecision string) ( case "time without time zone": return typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, "") case "date": - return typing.NewExtendedTimeDetails(typing.ETime, ext.DateKindType, "") + return typing.Date, nil case "boolean": return typing.Boolean, nil } diff --git a/clients/redshift/dialect/typing_test.go b/clients/redshift/dialect/typing_test.go index eaa3d3fb0..9a883aaac 100644 --- a/clients/redshift/dialect/typing_test.go +++ b/clients/redshift/dialect/typing_test.go @@ -152,8 +152,7 @@ func TestRedshiftDialect_KindForDataType(t *testing.T) { { kd, err := dialect.KindForDataType("date", "") assert.NoError(t, err) - assert.Equal(t, typing.ETime.Kind, kd.Kind) - assert.Equal(t, ext.DateKindType, kd.ExtendedTimeDetails.Type) + assert.Equal(t, typing.Date, kd) } } } diff --git a/clients/shared/default_value_test.go b/clients/shared/default_value_test.go index cdecc0c70..824cf20d3 100644 --- a/clients/shared/default_value_test.go +++ b/clients/shared/default_value_test.go @@ -28,12 +28,6 @@ func TestColumn_DefaultValue(t *testing.T) { birthdayDateTime, err := ext.ParseDateTime(birthday.Format(ext.ISO8601), ext.TimestampTZKindType) assert.NoError(t, err) - // date - dateKind := typing.ETime - dateNestedKind, err := ext.NewNestedKind(ext.DateKindType, "") - assert.NoError(t, err) - dateKind.ExtendedTimeDetails = &dateNestedKind - // time timeKind := typing.ETime timeNestedKind, err := ext.NewNestedKind(ext.TimeKindType, "") @@ -85,7 +79,7 @@ func TestColumn_DefaultValue(t *testing.T) { }, { name: "date", - col: columns.NewColumnWithDefaultValue("", dateKind, birthdayDateTime), + col: columns.NewColumnWithDefaultValue("", typing.Date, birthdayDateTime), expectedValue: "'2022-09-06'", }, { diff --git a/clients/snowflake/dialect/dialect.go b/clients/snowflake/dialect/dialect.go index 06f0a98f2..d76b530c0 100644 --- a/clients/snowflake/dialect/dialect.go +++ b/clients/snowflake/dialect/dialect.go @@ -30,14 +30,14 @@ func (SnowflakeDialect) DataTypeForKind(kindDetails typing.KindDetails, _ bool) return "variant" case typing.Boolean.Kind: return "boolean" + case typing.Date.Kind: + return "date" case typing.ETime.Kind: switch kindDetails.ExtendedTimeDetails.Type { case ext.TimestampTZKindType: return "timestamp_tz" case ext.TimestampNTZKindType: return "timestamp_ntz" - case ext.DateKindType: - return "date" case ext.TimeKindType: return "time" } @@ -104,7 +104,7 @@ func (SnowflakeDialect) KindForDataType(snowflakeType string, _ string) (typing. case "time": return typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, "") case "date": - return typing.NewExtendedTimeDetails(typing.ETime, ext.DateKindType, "") + return typing.Date, nil default: return typing.Invalid, nil } diff --git a/clients/snowflake/dialect/dialect_test.go b/clients/snowflake/dialect/dialect_test.go index 13e5f52a6..7fe2e77b4 100644 --- a/clients/snowflake/dialect/dialect_test.go +++ b/clients/snowflake/dialect/dialect_test.go @@ -198,7 +198,7 @@ func TestSnowflakeDialect_KindForDataType_NoDataLoss(t *testing.T) { kindDetails := []typing.KindDetails{ typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, ""), typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimeKindType, ""), - typing.MustNewExtendedTimeDetails(typing.ETime, ext.DateKindType, ""), + typing.Date, typing.String, typing.Boolean, typing.Struct, diff --git a/lib/debezium/converters/date.go b/lib/debezium/converters/date.go index 1e32dc068..855c7b884 100644 --- a/lib/debezium/converters/date.go +++ b/lib/debezium/converters/date.go @@ -15,7 +15,7 @@ func (Date) layout() string { } func (d Date) ToKindDetails() (typing.KindDetails, error) { - return typing.NewExtendedTimeDetails(typing.ETime, ext.DateKindType, d.layout()) + return typing.Date, nil } func (d Date) Convert(value any) (any, error) { @@ -25,5 +25,5 @@ func (d Date) Convert(value any) (any, error) { } // Represents the number of days since the epoch. - return ext.NewExtendedTime(time.UnixMilli(0).In(time.UTC).AddDate(0, 0, int(valueInt64)), ext.DateKindType, d.layout()), nil + return time.UnixMilli(0).In(time.UTC).AddDate(0, 0, int(valueInt64)), nil } diff --git a/lib/optimization/table_data.go b/lib/optimization/table_data.go index ec34e6069..fb36a75ef 100644 --- a/lib/optimization/table_data.go +++ b/lib/optimization/table_data.go @@ -202,7 +202,7 @@ func (t *TableData) DistinctDates(colName string) ([]string, error) { return nil, fmt.Errorf("col: %v does not exist on row: %v", colName, row) } - _time, err := ext.ParseFromInterface(val, ext.DateKindType) + _time, err := ext.ParseDateFromInterface(val) if err != nil { return nil, fmt.Errorf("col: %v is not a time column, value: %v, err: %w", colName, val, err) } diff --git a/lib/parquetutil/parse_values.go b/lib/parquetutil/parse_values.go index 05af0d519..b88e44f60 100644 --- a/lib/parquetutil/parse_values.go +++ b/lib/parquetutil/parse_values.go @@ -20,6 +20,13 @@ func ParseValue(colVal any, colKind columns.Column) (any, error) { } switch colKind.KindDetails.Kind { + case typing.Date.Kind: + _time, err := ext.ParseDateFromInterface(colVal) + if err != nil { + return "", fmt.Errorf("failed to cast colVal as time.Time, colVal: %v, err: %w", colVal, err) + } + + return _time.Format(ext.PostgresDateFormat), nil case typing.ETime.Kind: if err := colKind.KindDetails.EnsureExtendedTimeDetails(); err != nil { return "", err @@ -30,7 +37,7 @@ func ParseValue(colVal any, colKind columns.Column) (any, error) { return "", fmt.Errorf("failed to cast colVal as time.Time, colVal: %v, err: %w", colVal, err) } - if colKind.KindDetails.ExtendedTimeDetails.Type == ext.DateKindType || colKind.KindDetails.ExtendedTimeDetails.Type == ext.TimeKindType { + if colKind.KindDetails.ExtendedTimeDetails.Type == ext.TimeKindType { return _time.Format(colKind.KindDetails.ExtendedTimeDetails.Format), nil } diff --git a/lib/typing/ext/parse.go b/lib/typing/ext/parse.go index e851a0888..794cb630d 100644 --- a/lib/typing/ext/parse.go +++ b/lib/typing/ext/parse.go @@ -20,6 +20,19 @@ func ParseTimeExactMatch(layout, value string) (time.Time, error) { return ts, nil } +func ParseDateFromInterface(val any) (time.Time, error) { + switch convertedVal := val.(type) { + case time.Time: + return convertedVal, nil + case *ExtendedTime: + return convertedVal.GetTime(), nil + case string: + return parseDate(convertedVal) + default: + return time.Time{}, fmt.Errorf("unsupported type: %T", convertedVal) + } +} + func ParseFromInterface(val any, kindType ExtendedTimeKindType) (time.Time, error) { switch convertedVal := val.(type) { case nil: @@ -46,16 +59,6 @@ func ParseDateTime(value string, kindType ExtendedTimeKindType) (time.Time, erro return parseTimestampNTZ(value) case TimestampTZKindType: return parseTimestampTZ(value) - case DateKindType: - // Try date first - if ts, err := parseDate(value); err == nil { - return ts, nil - } - - // If that doesn't work, try timestamp - if ts, err := parseTimestampTZ(value); err == nil { - return ts, nil - } case TimeKindType: // Try time first if ts, err := parseTime(value); err == nil { @@ -97,6 +100,11 @@ func parseDate(value string) (time.Time, error) { } } + // If that doesn't work, try timestamp + if ts, err := parseTimestampTZ(value); err == nil { + return ts, nil + } + return time.Time{}, fmt.Errorf("unsupported value: %q", value) } diff --git a/lib/typing/ext/time.go b/lib/typing/ext/time.go index bc10cb02f..5b350b994 100644 --- a/lib/typing/ext/time.go +++ b/lib/typing/ext/time.go @@ -21,8 +21,6 @@ func (e ExtendedTimeKindType) defaultLayout() (string, error) { return time.RFC3339Nano, nil case TimestampNTZKindType: return RFC3339NoTZ, nil - case DateKindType: - return PostgresDateFormat, nil case TimeKindType: return PostgresTimeFormat, nil default: diff --git a/lib/typing/parquet.go b/lib/typing/parquet.go index 5a51ac2ce..c753a3ae9 100644 --- a/lib/typing/parquet.go +++ b/lib/typing/parquet.go @@ -70,11 +70,14 @@ type Field struct { func (k *KindDetails) ParquetAnnotation(colName string) (*Field, error) { var stringKind bool - if k.ExtendedTimeDetails != nil { - // If it's a date or time, it should be a STRING annotation. - if k.ExtendedTimeDetails.Type == ext.DateKindType || k.ExtendedTimeDetails.Type == ext.TimeKindType { - stringKind = true - } + + // If it's a date or time, it should be a STRING annotation. + if k.Kind == Date.Kind { + stringKind = true + } + + if k.ExtendedTimeDetails != nil && k.ExtendedTimeDetails.Type == ext.TimeKindType { + stringKind = true } if k.Kind == String.Kind || k.Kind == Struct.Kind || stringKind { diff --git a/lib/typing/typing.go b/lib/typing/typing.go index e96f49b50..12c929ee0 100644 --- a/lib/typing/typing.go +++ b/lib/typing/typing.go @@ -74,6 +74,11 @@ var ( Kind: "string", } + // Time data types + Date = KindDetails{ + Kind: "date", + } + ETime = KindDetails{ Kind: "extended_time", }