From ae18c5c14ef15d482e2d92848a3f627ad5265d3c Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Tue, 22 Oct 2024 11:41:17 -0700 Subject: [PATCH] [ExtendedTime] More clean up (#969) --- clients/shared/default_value_test.go | 14 +++- clients/snowflake/dialect/dialect_test.go | 4 +- lib/debezium/converters/time.go | 12 ++-- lib/debezium/converters/time_test.go | 10 ++- lib/debezium/schema_test.go | 17 +++-- .../table_data_merge_columns_test.go | 5 +- lib/optimization/table_data_test.go | 2 +- lib/parquetutil/parse_values_test.go | 15 ++++- lib/typing/ext/parse.go | 14 +++- lib/typing/ext/time.go | 66 +++++++++---------- lib/typing/ext/time_test.go | 4 +- lib/typing/mongo/bson_test.go | 4 +- 12 files changed, 107 insertions(+), 60 deletions(-) diff --git a/clients/shared/default_value_test.go b/clients/shared/default_value_test.go index 5119ae6b7..bde1d165f 100644 --- a/clients/shared/default_value_test.go +++ b/clients/shared/default_value_test.go @@ -30,13 +30,21 @@ func TestColumn_DefaultValue(t *testing.T) { // date dateKind := typing.ETime - dateKind.ExtendedTimeDetails = &ext.Date + dateNestedKind, err := ext.NewNestedKind(ext.DateKindType, "") + assert.NoError(t, err) + dateKind.ExtendedTimeDetails = &dateNestedKind + // time timeKind := typing.ETime - timeKind.ExtendedTimeDetails = &ext.Time + timeNestedKind, err := ext.NewNestedKind(ext.TimeKindType, "") + assert.NoError(t, err) + timeKind.ExtendedTimeDetails = &timeNestedKind + // date time dateTimeKind := typing.ETime - dateTimeKind.ExtendedTimeDetails = &ext.TimestampTZ + dateTimeNestedKind, err := ext.NewNestedKind(ext.TimestampTZKindType, "") + assert.NoError(t, err) + dateTimeKind.ExtendedTimeDetails = &dateTimeNestedKind testCases := []struct { name string diff --git a/clients/snowflake/dialect/dialect_test.go b/clients/snowflake/dialect/dialect_test.go index 87647a26d..d377aaf5a 100644 --- a/clients/snowflake/dialect/dialect_test.go +++ b/clients/snowflake/dialect/dialect_test.go @@ -180,7 +180,7 @@ func TestSnowflakeDialect_KindForDataType_DateTime(t *testing.T) { for _, expectedDateTime := range expectedDateTimes { kd, err := SnowflakeDialect{}.KindForDataType(expectedDateTime, "") assert.NoError(t, err) - assert.Equal(t, ext.TimestampTZ.Type, kd.ExtendedTimeDetails.Type, expectedDateTime) + assert.Equal(t, ext.TimestampTZKindType, kd.ExtendedTimeDetails.Type, expectedDateTime) } } { @@ -189,7 +189,7 @@ func TestSnowflakeDialect_KindForDataType_DateTime(t *testing.T) { for _, expectedDateTime := range expectedDateTimes { kd, err := SnowflakeDialect{}.KindForDataType(expectedDateTime, "") assert.NoError(t, err) - assert.Equal(t, ext.TimestampNTZ.Type, kd.ExtendedTimeDetails.Type, expectedDateTime) + assert.Equal(t, ext.TimestampNTZKindType, kd.ExtendedTimeDetails.Type, expectedDateTime) } } } diff --git a/lib/debezium/converters/time.go b/lib/debezium/converters/time.go index 0d24ffc0a..7bb4fe3cc 100644 --- a/lib/debezium/converters/time.go +++ b/lib/debezium/converters/time.go @@ -11,18 +11,22 @@ import ( type Time struct{} -func (Time) ToKindDetails() typing.KindDetails { - return typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, "") +func (Time) layout() string { + return "15:04:05.000" +} + +func (t Time) ToKindDetails() typing.KindDetails { + return typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, t.layout()) } -func (Time) Convert(val any) (any, error) { +func (t Time) Convert(val any) (any, error) { valInt64, isOk := val.(int64) if !isOk { return nil, fmt.Errorf("expected int64 got '%v' with type %T", val, val) } // Represents the number of milliseconds past midnight, and does not include timezone information. - return ext.NewExtendedTime(time.UnixMilli(valInt64).In(time.UTC), ext.TimeKindType, ""), nil + return ext.NewExtendedTime(time.UnixMilli(valInt64).In(time.UTC), ext.TimeKindType, t.layout()), nil } type NanoTime struct{} diff --git a/lib/debezium/converters/time_test.go b/lib/debezium/converters/time_test.go index 947844d57..67e68baa6 100644 --- a/lib/debezium/converters/time_test.go +++ b/lib/debezium/converters/time_test.go @@ -114,13 +114,21 @@ func TestZonedTimestamp_Convert(t *testing.T) { } func TestTime_Convert(t *testing.T) { + { + val, err := Time{}.Convert(int64(54720321)) + assert.NoError(t, err) + + extTime, isOk := val.(*ext.ExtendedTime) + assert.True(t, isOk) + assert.Equal(t, "15:12:00.321", extTime.String("")) + } { val, err := Time{}.Convert(int64(54720000)) assert.NoError(t, err) extTime, isOk := val.(*ext.ExtendedTime) assert.True(t, isOk) - assert.Equal(t, "15:12:00+00", extTime.String("")) + assert.Equal(t, "15:12:00.000", extTime.String("")) } } diff --git a/lib/debezium/schema_test.go b/lib/debezium/schema_test.go index e990c53c0..5c80d4e41 100644 --- a/lib/debezium/schema_test.go +++ b/lib/debezium/schema_test.go @@ -254,13 +254,20 @@ func TestField_ToKindDetails(t *testing.T) { } } { - // Time - for _, dbzType := range []SupportedDebeziumType{Time, TimeKafkaConnect, TimeWithTimezone} { - kd, err := Field{DebeziumType: dbzType}.ToKindDetails() + { + // Time with timezone + kd, err := Field{DebeziumType: TimeWithTimezone}.ToKindDetails() assert.NoError(t, err) - assert.Equal(t, typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, ""), kd, dbzType) + assert.Equal(t, typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, ""), kd) + } + { + // Time + for _, dbzType := range []SupportedDebeziumType{Time, TimeKafkaConnect} { + kd, err := Field{DebeziumType: dbzType}.ToKindDetails() + assert.NoError(t, err) + assert.Equal(t, typing.NewExtendedTimeDetails(typing.ETime, ext.TimeKindType, "15:04:05.000"), kd, dbzType) + } } - { // Micro time kd, err := Field{DebeziumType: MicroTime}.ToKindDetails() diff --git a/lib/optimization/table_data_merge_columns_test.go b/lib/optimization/table_data_merge_columns_test.go index 31c9cce08..390024362 100644 --- a/lib/optimization/table_data_merge_columns_test.go +++ b/lib/optimization/table_data_merge_columns_test.go @@ -55,7 +55,10 @@ func TestTableData_UpdateInMemoryColumnsFromDestination(t *testing.T) { tableData.AddInMemoryCol(columns.NewColumn("foo", typing.String)) extTime := typing.ETime - extTime.ExtendedTimeDetails = &ext.Date + nestedKind, err := ext.NewNestedKind(ext.DateKindType, "") + assert.NoError(t, err) + + extTime.ExtendedTimeDetails = &nestedKind tsCol := columns.NewColumn("foo", extTime) assert.NoError(t, tableData.MergeColumnsFromDestination(tsCol)) diff --git a/lib/optimization/table_data_test.go b/lib/optimization/table_data_test.go index 79eaa0243..c54cc1b9f 100644 --- a/lib/optimization/table_data_test.go +++ b/lib/optimization/table_data_test.go @@ -166,7 +166,7 @@ func TestTableData_UpdateInMemoryColumns(t *testing.T) { col, isOk := tableData.ReadOnlyInMemoryCols().GetColumn("CHANGE_me") assert.True(t, isOk) - assert.Equal(t, ext.TimestampTZ.Type, col.KindDetails.ExtendedTimeDetails.Type) + assert.Equal(t, ext.TimestampTZKindType, col.KindDetails.ExtendedTimeDetails.Type) // It went from invalid to boolean. col, isOk = tableData.ReadOnlyInMemoryCols().GetColumn("bar") diff --git a/lib/parquetutil/parse_values_test.go b/lib/parquetutil/parse_values_test.go index b4c30f67c..ddf2cce0e 100644 --- a/lib/parquetutil/parse_values_test.go +++ b/lib/parquetutil/parse_values_test.go @@ -58,7 +58,10 @@ func TestParseValue(t *testing.T) { { // Time eTime := typing.ETime - eTime.ExtendedTimeDetails = &ext.Time + nestedKind, err := ext.NewNestedKind(ext.TimeKindType, "") + assert.NoError(t, err) + + eTime.ExtendedTimeDetails = &nestedKind value, err := ParseValue("03:15:00", columns.NewColumn("", eTime)) assert.NoError(t, err) assert.Equal(t, "03:15:00+00", value) @@ -66,7 +69,10 @@ func TestParseValue(t *testing.T) { { // Date eDate := typing.ETime - eDate.ExtendedTimeDetails = &ext.Date + nestedKind, err := ext.NewNestedKind(ext.DateKindType, "") + assert.NoError(t, err) + + eDate.ExtendedTimeDetails = &nestedKind value, err := ParseValue("2022-12-25", columns.NewColumn("", eDate)) assert.NoError(t, err) assert.Equal(t, "2022-12-25", value) @@ -74,7 +80,10 @@ func TestParseValue(t *testing.T) { { // Timestamp TZ eDateTime := typing.ETime - eDateTime.ExtendedTimeDetails = &ext.TimestampTZ + nestedKind, err := ext.NewNestedKind(ext.TimestampTZKindType, "") + assert.NoError(t, err) + + eDateTime.ExtendedTimeDetails = &nestedKind value, err := ParseValue("2023-04-24T17:29:05.69944Z", columns.NewColumn("", eDateTime)) assert.NoError(t, err) assert.Equal(t, int64(1682357345699), value) diff --git a/lib/typing/ext/parse.go b/lib/typing/ext/parse.go index ee8dae50d..909bd929f 100644 --- a/lib/typing/ext/parse.go +++ b/lib/typing/ext/parse.go @@ -50,7 +50,12 @@ func ParseExtendedDateTime(value string, kindType ExtendedTimeKindType) (*Extend // If that doesn't work, try timestamp if et, err := parseDateTime(value); err == nil { - et.nestedKind = Date + nestedKind, err := NewNestedKind(DateKindType, "") + if err != nil { + return nil, err + } + + et.nestedKind = nestedKind return et, nil } case TimeKindType: @@ -61,7 +66,12 @@ func ParseExtendedDateTime(value string, kindType ExtendedTimeKindType) (*Extend // If that doesn't work, try timestamp if et, err := parseDateTime(value); err == nil { - et.nestedKind = Time + nestedKind, err := NewNestedKind(TimeKindType, "") + if err != nil { + return nil, err + } + + et.nestedKind = nestedKind return et, nil } } diff --git a/lib/typing/ext/time.go b/lib/typing/ext/time.go index ebe5a6a8a..d84c2ea5b 100644 --- a/lib/typing/ext/time.go +++ b/lib/typing/ext/time.go @@ -2,12 +2,10 @@ package ext import ( "cmp" - "encoding/json" + "fmt" "time" ) -// TODO: This package should have a concept of default formats for each type. - type ExtendedTimeKindType string const ( @@ -17,32 +15,34 @@ const ( TimeKindType ExtendedTimeKindType = "time" ) +func (e ExtendedTimeKindType) defaultLayout() (string, error) { + switch e { + case TimestampTZKindType: + return time.RFC3339Nano, nil + case TimestampNTZKindType: + return RFC3339NanosecondNoTZ, nil + case DateKindType: + return PostgresDateFormat, nil + case TimeKindType: + return PostgresTimeFormat, nil + default: + return "", fmt.Errorf("unknown kind type: %q", e) + } +} + type NestedKind struct { Type ExtendedTimeKindType Format string } -var ( - TimestampNTZ = NestedKind{ - Type: TimestampNTZKindType, - Format: RFC3339NanosecondNoTZ, - } - - TimestampTZ = NestedKind{ - Type: TimestampTZKindType, - Format: time.RFC3339Nano, +func NewNestedKind(kindType ExtendedTimeKindType, optionalFormat string) (NestedKind, error) { + defaultLayout, err := kindType.defaultLayout() + if err != nil { + return NestedKind{}, err } - Date = NestedKind{ - Type: DateKindType, - Format: PostgresDateFormat, - } - - Time = NestedKind{ - Type: TimeKindType, - Format: PostgresTimeFormat, - } -) + return NestedKind{Type: kindType, Format: cmp.Or(optionalFormat, defaultLayout)}, nil +} // ExtendedTime is created because Golang's time.Time does not allow us to explicitly cast values as a date, or time // and only allows timestamp expressions. @@ -51,29 +51,25 @@ type ExtendedTime struct { nestedKind NestedKind } +// MarshalJSON is a custom JSON marshaller for ExtendedTime. +// This is only used for nested MongoDB objects where there may be nested DateTime values. func (e ExtendedTime) MarshalJSON() ([]byte, error) { - return json.Marshal(e.String("")) + // This is consistent with how MongoDB's Go driver marshals time.Time + return e.ts.UTC().MarshalJSON() } +// TODO: Have this return an error instead of nil func NewExtendedTime(t time.Time, kindType ExtendedTimeKindType, originalFormat string) *ExtendedTime { - if originalFormat == "" { - switch kindType { - case TimestampTZKindType: - originalFormat = TimestampTZ.Format - case TimestampNTZKindType: - originalFormat = TimestampNTZ.Format - case DateKindType: - originalFormat = Date.Format - case TimeKindType: - originalFormat = Time.Format - } + defaultLayout, err := kindType.defaultLayout() + if err != nil { + return nil } return &ExtendedTime{ ts: t, nestedKind: NestedKind{ Type: kindType, - Format: originalFormat, + Format: cmp.Or(originalFormat, defaultLayout), }, } } diff --git a/lib/typing/ext/time_test.go b/lib/typing/ext/time_test.go index be574670c..4963ed24a 100644 --- a/lib/typing/ext/time_test.go +++ b/lib/typing/ext/time_test.go @@ -15,7 +15,7 @@ func TestExtendedTime_MarshalJSON(t *testing.T) { // Single value bytes, err := json.Marshal(extTime) assert.NoError(t, err) - assert.Equal(t, `"2025-09-13T00:00:00.123Z"`, string(bytes)) + assert.Equal(t, `"2025-09-13T00:00:00.123456Z"`, string(bytes)) } { // As a nested object @@ -30,6 +30,6 @@ func TestExtendedTime_MarshalJSON(t *testing.T) { bytes, err := json.Marshal(obj) assert.NoError(t, err) - assert.Equal(t, `{"extendedTime":"2025-09-13T00:00:00.123Z","foo":"bar"}`, string(bytes)) + assert.Equal(t, `{"extendedTime":"2025-09-13T00:00:00.123456Z","foo":"bar"}`, string(bytes)) } } diff --git a/lib/typing/mongo/bson_test.go b/lib/typing/mongo/bson_test.go index 82d440099..500ce2cc4 100644 --- a/lib/typing/mongo/bson_test.go +++ b/lib/typing/mongo/bson_test.go @@ -125,6 +125,7 @@ func TestJSONEToMap(t *testing.T) { extendedTime, isOk := result["test_timestamp"] assert.True(t, isOk) assert.Equal(t, ext.NewExtendedTime(time.Date(2023, time.March, 16, 1, 18, 37, 0, time.UTC), ext.TimestampTZKindType, ext.ISO8601), extendedTime) + assert.Equal(t, "2023-03-16T01:18:37+00:00", extendedTime.(*ext.ExtendedTime).String("")) } { // Boolean @@ -143,7 +144,7 @@ func TestJSONEToMap(t *testing.T) { // Nested object value, err := json.Marshal(result["test_nested_object"]) assert.NoError(t, err) - assert.Equal(t, `{"a":{"b":{"c":"hello"}},"super_nested":{"foo":"bar","test_timestamp":"2023-03-16T01:18:37+00:00"},"test_timestamp":"2023-03-16T01:18:37+00:00"}`, string(value)) + assert.Equal(t, `{"a":{"b":{"c":"hello"}},"super_nested":{"foo":"bar","test_timestamp":"2023-03-16T01:18:37Z"},"test_timestamp":"2023-03-16T01:18:37Z"}`, string(value)) } { // NaN @@ -222,6 +223,7 @@ func TestBsonValueToGoValue(t *testing.T) { extendedTime, isOk := result.(*ext.ExtendedTime) assert.True(t, isOk) assert.Equal(t, ext.NewExtendedTime(time.Date(2021, time.January, 1, 0, 0, 0, 0, time.UTC), ext.TimestampTZKindType, ext.ISO8601), extendedTime) + assert.Equal(t, "2021-01-01T00:00:00+00:00", extendedTime.String("")) } { // primitive.ObjectID