diff --git a/lib/typing/ext/parse_test.go b/lib/typing/ext/parse_test.go index c481bf451..6042366c5 100644 --- a/lib/typing/ext/parse_test.go +++ b/lib/typing/ext/parse_test.go @@ -93,10 +93,18 @@ func TestParseExtendedDateTime_Timestamp(t *testing.T) { } func TestParseExtendedDateTime(t *testing.T) { - dateString := "27/12/82" - extTime, err := ParseExtendedDateTime(dateString, []string{"02/01/06"}) - assert.NoError(t, err) - assert.Equal(t, "27/12/82", extTime.String("")) + { + dateString := "27/12/82" + extTime, err := ParseExtendedDateTime(dateString, []string{"02/01/06"}) + assert.NoError(t, err) + assert.Equal(t, "27/12/82", extTime.String("")) + } + { + dtString := "Mon Jan 02 15:04:05.69944 -0700 2006" + ts, err := ParseExtendedDateTime(dtString, nil) + assert.NoError(t, err) + assert.NotEqual(t, ts.String(""), dtString) + } } func TestTimeLayout(t *testing.T) { diff --git a/lib/typing/parse.go b/lib/typing/parse.go new file mode 100644 index 000000000..fb5280153 --- /dev/null +++ b/lib/typing/parse.go @@ -0,0 +1,90 @@ +package typing + +import ( + "reflect" + "strings" + + "github.com/artie-labs/transfer/lib/typing/decimal" + "github.com/artie-labs/transfer/lib/typing/ext" +) + +func ParseValue(settings Settings, key string, optionalSchema map[string]KindDetails, val any) KindDetails { + if val == nil && !settings.CreateAllColumnsIfAvailable { + // If the value is nil and `createAllColumnsIfAvailable` = false, then return `Invalid + return Invalid + } + + if len(optionalSchema) > 0 { + // If the column exists in the schema, let's early exit. + if kindDetail, isOk := optionalSchema[key]; isOk { + // If the schema exists, use it as sot. + if val != nil && (kindDetail.Kind == ETime.Kind || kindDetail.Kind == EDecimal.Kind) { + // If the data type is either `ETime` or `EDecimal` and the value exists, we will not early exit + // We are not skipping so that we are able to get the exact layout specified at the row level to preserve: + // 1. Layout for time / date / timestamps + // 2. Precision and scale for numeric values + return parseValue(settings, val) + } + + return kindDetail + } + } + + return parseValue(settings, val) +} + +func parseValue(settings Settings, val any) KindDetails { + switch convertedVal := val.(type) { + case nil: + return Invalid + case uint, int, uint8, uint16, uint32, uint64, int8, int16, int32, int64: + return Integer + case float32, float64: + // Integers will be parsed as Floats if they come from JSON + // This is a limitation with JSON - https://github.com/golang/go/issues/56719 + // UNLESS Transfer is provided with a schema object, and we deliberately typecast the value to an integer + // before calling ParseValue(). + return Float + case bool: + return Boolean + case string: + // If it contains space or -, then we must check against date time. + // This way, we don't penalize every string into going through this loop + // In the future, we can have specific layout RFCs run depending on the char + if strings.Contains(convertedVal, ":") || strings.Contains(convertedVal, "-") { + extendedKind, err := ext.ParseExtendedDateTime(convertedVal, settings.AdditionalDateFormats) + if err == nil { + return KindDetails{ + Kind: ETime.Kind, + ExtendedTimeDetails: &extendedKind.NestedKind, + } + } + } + + if IsJSON(convertedVal) { + return Struct + } + + return String + + case *decimal.Decimal: + return KindDetails{ + Kind: EDecimal.Kind, + ExtendedDecimalDetails: convertedVal, + } + case *ext.ExtendedTime: + return KindDetails{ + Kind: ETime.Kind, + ExtendedTimeDetails: &convertedVal.NestedKind, + } + default: + // Check if the val is one of our custom-types + if reflect.TypeOf(val).Kind() == reflect.Slice { + return Array + } else if reflect.TypeOf(val).Kind() == reflect.Map { + return Struct + } + } + + return Invalid +} diff --git a/lib/typing/parse_test.go b/lib/typing/parse_test.go new file mode 100644 index 000000000..756e138d7 --- /dev/null +++ b/lib/typing/parse_test.go @@ -0,0 +1,141 @@ +package typing + +import ( + "errors" + "fmt" + "math" + "testing" + + "github.com/artie-labs/transfer/lib/typing/ext" + "github.com/stretchr/testify/assert" +) + +func Test_ParseValue(t *testing.T) { + { + + // Invalid + assert.Equal(t, ParseValue(Settings{}, "", nil, nil), Invalid) + assert.Equal(t, ParseValue(Settings{}, "", nil, errors.New("hello")), Invalid) + } + { + // Nil + assert.Equal(t, ParseValue(Settings{}, "", nil, ""), String) + assert.Equal(t, ParseValue(Settings{}, "", nil, "nil"), String) + assert.Equal(t, ParseValue(Settings{}, "", nil, nil), Invalid) + } + { + // Floats + assert.Equal(t, ParseValue(Settings{}, "", nil, 7.5), Float) + assert.Equal(t, ParseValue(Settings{}, "", nil, -7.4999999), Float) + assert.Equal(t, ParseValue(Settings{}, "", nil, 7.0), Float) + } + { + // Integers + assert.Equal(t, ParseValue(Settings{}, "", nil, 9), Integer) + assert.Equal(t, ParseValue(Settings{}, "", nil, math.MaxInt), Integer) + assert.Equal(t, ParseValue(Settings{}, "", nil, -1*math.MaxInt), Integer) + } + { + // Boolean + assert.Equal(t, ParseValue(Settings{}, "", nil, true), Boolean) + assert.Equal(t, ParseValue(Settings{}, "", nil, false), Boolean) + } + { + // Strings + possibleStrings := []string{ + "dusty", + "robin", + "abc", + } + + for _, possibleString := range possibleStrings { + assert.Equal(t, ParseValue(Settings{}, "", nil, possibleString), String) + } + } + { + // Arrays + assert.Equal(t, ParseValue(Settings{}, "", nil, []string{"a", "b", "c"}), Array) + assert.Equal(t, ParseValue(Settings{}, "", nil, []any{"a", 123, "c"}), Array) + assert.Equal(t, ParseValue(Settings{}, "", nil, []int64{1}), Array) + assert.Equal(t, ParseValue(Settings{}, "", nil, []bool{false}), Array) + assert.Equal(t, ParseValue(Settings{}, "", nil, []any{false, true}), Array) + } + { + // Time + kindDetails := ParseValue(Settings{}, "", nil, "00:18:11.13116+00") + assert.Equal(t, ETime.Kind, kindDetails.Kind) + assert.Equal(t, ext.TimeKindType, kindDetails.ExtendedTimeDetails.Type) + } + { + // Date layouts from Go's time.Time library + possibleDates := []any{ + "01/02 03:04:05PM '06 -0700", // The reference time, in numerical order. + "Mon Jan 2 15:04:05 2006", + "Mon Jan 2 15:04:05 MST 2006", + "Mon Jan 02 15:04:05 -0700 2006", + "02 Jan 06 15:04 MST", + "02 Jan 06 15:04 -0700", // RFC822 with numeric zone + "Monday, 02-Jan-06 15:04:05 MST", + "Mon, 02 Jan 2006 15:04:05 MST", + "Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone + "2019-10-12T14:20:50.52+07:00", + } + + for _, possibleDate := range possibleDates { + assert.Equal(t, ParseValue(Settings{}, "", nil, possibleDate).ExtendedTimeDetails.Type, ext.DateTime.Type, fmt.Sprintf("Failed format, value is: %v", possibleDate)) + + // Test the parseDT function as well. + ts, err := ext.ParseExtendedDateTime(fmt.Sprint(possibleDate), []string{}) + assert.NoError(t, err, err) + assert.False(t, ts.IsZero(), ts) + } + + ts, err := ext.ParseExtendedDateTime("random", []string{}) + assert.ErrorContains(t, err, "dtString: random is not supported") + assert.Nil(t, ts) + } + { + // Maps + randomMaps := []any{ + map[string]any{ + "foo": "bar", + "dog": "dusty", + "breed": "australian shepherd", + }, + map[string]bool{ + "foo": true, + "bar": false, + }, + map[int]int{ + 1: 1, + 2: 2, + 3: 3, + }, + map[string]any{ + "food": map[string]any{ + "pizza": "slice", + "fruit": "apple", + }, + "music": []string{"a", "b", "c"}, + }, + } + + for _, randomMap := range randomMaps { + assert.Equal(t, ParseValue(Settings{}, "", nil, randomMap), Struct, fmt.Sprintf("Failed message is: %v", randomMap)) + } + } +} + +func TestOptionalSchema(t *testing.T) { + { + optionalSchema := map[string]KindDetails{ + "created_at": String, + } + + // Respect the schema if the value is not null. + assert.Equal(t, String, ParseValue(Settings{}, "created_at", optionalSchema, "2023-01-01")) + // Kind is invalid because `createAllColumnsIfAvailable` is not enabled. + assert.Equal(t, Invalid, ParseValue(Settings{}, "created_at", optionalSchema, nil)) + assert.Equal(t, String, ParseValue(Settings{CreateAllColumnsIfAvailable: true}, "created_at", optionalSchema, nil)) + } +} diff --git a/lib/typing/typing.go b/lib/typing/typing.go index ac3b1f72c..c46a0a219 100644 --- a/lib/typing/typing.go +++ b/lib/typing/typing.go @@ -3,7 +3,6 @@ package typing import ( "encoding/json" "fmt" - "reflect" "strings" "github.com/artie-labs/transfer/lib/config/constants" @@ -100,83 +99,6 @@ func IsJSON(str string) bool { return false } -func ParseValue(settings Settings, key string, optionalSchema map[string]KindDetails, val any) KindDetails { - if val == nil && !settings.CreateAllColumnsIfAvailable { - // If the value is nil and `createAllColumnsIfAvailable` = false, then return `Invalid - return Invalid - } - - if len(optionalSchema) > 0 { - // If the column exists in the schema, let's early exit. - if kindDetail, isOk := optionalSchema[key]; isOk { - // If the schema exists, use it as sot. - if val != nil && (kindDetail.Kind == ETime.Kind || kindDetail.Kind == EDecimal.Kind) { - // If the data type is either `ETime` or `EDecimal` and the value exists, we will not early exit - // We are not skipping so that we are able to get the exact layout specified at the row level to preserve: - // 1. Layout for time / date / timestamps - // 2. Precision and scale for numeric values - return ParseValue(settings, key, nil, val) - } - - return kindDetail - } - } - - switch convertedVal := val.(type) { - case nil: - return Invalid - case uint, int, uint8, uint16, uint32, uint64, int8, int16, int32, int64: - return Integer - case float32, float64: - // Integers will be parsed as Floats if they come from JSON - // This is a limitation with JSON - https://github.com/golang/go/issues/56719 - // UNLESS Transfer is provided with a schema object, and we deliberately typecast the value to an integer - // before calling ParseValue(). - return Float - case bool: - return Boolean - case string: - // If it contains space or -, then we must check against date time. - // This way, we don't penalize every string into going through this loop - // In the future, we can have specific layout RFCs run depending on the char - if strings.Contains(convertedVal, ":") || strings.Contains(convertedVal, "-") { - extendedKind, err := ext.ParseExtendedDateTime(convertedVal, settings.AdditionalDateFormats) - if err == nil { - return KindDetails{ - Kind: ETime.Kind, - ExtendedTimeDetails: &extendedKind.NestedKind, - } - } - } - - if IsJSON(convertedVal) { - return Struct - } - - return String - - case *decimal.Decimal: - return KindDetails{ - Kind: EDecimal.Kind, - ExtendedDecimalDetails: convertedVal, - } - case *ext.ExtendedTime: - return KindDetails{ - Kind: ETime.Kind, - ExtendedTimeDetails: &convertedVal.NestedKind, - } - default: - // Check if the val is one of our custom-types - if reflect.TypeOf(val).Kind() == reflect.Slice { - return Array - } else if reflect.TypeOf(val).Kind() == reflect.Map { - return Struct - } - } - - return Invalid -} - func KindToDWHType(kd KindDetails, dwh constants.DestinationKind, isPk bool) string { switch dwh { case constants.Snowflake: diff --git a/lib/typing/typing_test.go b/lib/typing/typing_test.go index 810fafcbf..91303bcbc 100644 --- a/lib/typing/typing_test.go +++ b/lib/typing/typing_test.go @@ -1,38 +1,30 @@ package typing import ( - "errors" - "fmt" - "math" "testing" - "github.com/artie-labs/transfer/lib/typing/ext" "github.com/stretchr/testify/assert" ) -func TestNil(t *testing.T) { - assert.Equal(t, ParseValue(Settings{}, "", nil, ""), String) - assert.Equal(t, ParseValue(Settings{}, "", nil, "nil"), String) - assert.Equal(t, ParseValue(Settings{}, "", nil, nil), Invalid) -} - -func TestJSONString(t *testing.T) { - type _testCase struct { - input string - expected bool - } - - testCases := []_testCase{ - { - input: "{}", - expected: true, - }, - { - input: `{"hello": "world"}`, - expected: true, - }, - { - input: `{ +func Test_IsJSON(t *testing.T) { + { + invalidValues := []string{ + `{"hello": "world"`, + `{"hello": "world"}}`, + `{null}`, + "", + " ", + } + + for _, invalidValue := range invalidValues { + assert.False(t, IsJSON(invalidValue), invalidValue) + } + } + { + validValues := []string{ + "{}", + `{"hello": "world"}`, + `{ "hello": { "world": { "nested_value": true @@ -42,171 +34,12 @@ func TestJSONString(t *testing.T) { "number": 7.5, "integerNum": 7 }`, - expected: true, - }, - { - input: `{"hello": "world"`, - }, - { - input: `{"hello": "world"}}`, - }, - { - input: `{null}`, - }, - { - input: `[]`, - expected: true, - }, - { - input: `[1, 2, 3, 4]`, - expected: true, - }, - { - input: `[1, 2, 3, 4`, - }, - { - input: ``, - }, - { - input: ` `, - }, - } - - for _, tc := range testCases { - assert.Equal(t, tc.expected, IsJSON(tc.input), tc.input) - } -} - -func TestParseValueBasic(t *testing.T) { - // Floats - assert.Equal(t, ParseValue(Settings{}, "", nil, 7.5), Float) - assert.Equal(t, ParseValue(Settings{}, "", nil, -7.4999999), Float) - assert.Equal(t, ParseValue(Settings{}, "", nil, 7.0), Float) - - // Integers - assert.Equal(t, ParseValue(Settings{}, "", nil, 9), Integer) - assert.Equal(t, ParseValue(Settings{}, "", nil, math.MaxInt), Integer) - assert.Equal(t, ParseValue(Settings{}, "", nil, -1*math.MaxInt), Integer) - - // Invalid - assert.Equal(t, ParseValue(Settings{}, "", nil, nil), Invalid) - assert.Equal(t, ParseValue(Settings{}, "", nil, errors.New("hello")), Invalid) - - // Boolean - assert.Equal(t, ParseValue(Settings{}, "", nil, true), Boolean) - assert.Equal(t, ParseValue(Settings{}, "", nil, false), Boolean) -} - -func TestParseValueArrays(t *testing.T) { - assert.Equal(t, ParseValue(Settings{}, "", nil, []string{"a", "b", "c"}), Array) - assert.Equal(t, ParseValue(Settings{}, "", nil, []any{"a", 123, "c"}), Array) - assert.Equal(t, ParseValue(Settings{}, "", nil, []int64{1}), Array) - assert.Equal(t, ParseValue(Settings{}, "", nil, []bool{false}), Array) -} - -func TestParseValueMaps(t *testing.T) { - randomMaps := []any{ - map[string]any{ - "foo": "bar", - "dog": "dusty", - "breed": "australian shepherd", - }, - map[string]bool{ - "foo": true, - "bar": false, - }, - map[int]int{ - 1: 1, - 2: 2, - 3: 3, - }, - map[string]any{ - "food": map[string]any{ - "pizza": "slice", - "fruit": "apple", - }, - "music": []string{"a", "b", "c"}, - }, - } - - for _, randomMap := range randomMaps { - assert.Equal(t, ParseValue(Settings{}, "", nil, randomMap), Struct, fmt.Sprintf("Failed message is: %v", randomMap)) - } -} + "[]", + "[1, 2, 3, 4]", + } -func TestDateTime(t *testing.T) { - // Took this list from the Go time library. - possibleDates := []any{ - "01/02 03:04:05PM '06 -0700", // The reference time, in numerical order. - "Mon Jan 2 15:04:05 2006", - "Mon Jan 2 15:04:05 MST 2006", - "Mon Jan 02 15:04:05 -0700 2006", - "02 Jan 06 15:04 MST", - "02 Jan 06 15:04 -0700", // RFC822 with numeric zone - "Monday, 02-Jan-06 15:04:05 MST", - "Mon, 02 Jan 2006 15:04:05 MST", - "Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone - "2019-10-12T14:20:50.52+07:00", + for _, validValue := range validValues { + assert.True(t, IsJSON(validValue), validValue) + } } - - for _, possibleDate := range possibleDates { - assert.Equal(t, ParseValue(Settings{}, "", nil, possibleDate).ExtendedTimeDetails.Type, ext.DateTime.Type, fmt.Sprintf("Failed format, value is: %v", possibleDate)) - - // Test the parseDT function as well. - ts, err := ext.ParseExtendedDateTime(fmt.Sprint(possibleDate), []string{}) - assert.NoError(t, err, err) - assert.False(t, ts.IsZero(), ts) - } - - ts, err := ext.ParseExtendedDateTime("random", []string{}) - assert.ErrorContains(t, err, "dtString: random is not supported") - assert.Nil(t, ts) -} - -func TestDateTime_Fallback(t *testing.T) { - dtString := "Mon Jan 02 15:04:05.69944 -0700 2006" - ts, err := ext.ParseExtendedDateTime(dtString, nil) - assert.NoError(t, err) - assert.NotEqual(t, ts.String(""), dtString) -} - -func TestTime(t *testing.T) { - kindDetails := ParseValue(Settings{}, "", nil, "00:18:11.13116+00") - // 00:42:26.693631Z - assert.Equal(t, ETime.Kind, kindDetails.Kind) - assert.Equal(t, ext.TimeKindType, kindDetails.ExtendedTimeDetails.Type) -} - -func TestString(t *testing.T) { - possibleStrings := []string{ - "dusty", - "robin", - "abc", - } - - for _, possibleString := range possibleStrings { - assert.Equal(t, ParseValue(Settings{}, "", nil, possibleString), String) - } -} - -func TestOptionalSchema(t *testing.T) { - kd := ParseValue(Settings{}, "", nil, true) - assert.Equal(t, kd, Boolean) - - // Key in a nil-schema - kd = ParseValue(Settings{}, "key", nil, true) - assert.Equal(t, kd, Boolean) - - // Non-existent key in the schema. - optionalSchema := map[string]KindDetails{ - "created_at": String, - } - - // Parse it as a date since it doesn't exist in the optional schema. - kd = ParseValue(Settings{}, "updated_at", optionalSchema, "2023-01-01") - assert.Equal(t, ext.Date.Type, kd.ExtendedTimeDetails.Type) - - // Respecting the optional schema - kd = ParseValue(Settings{}, "created_at", optionalSchema, "2023-01-01") - assert.Equal(t, String, kd) }