Skip to content

Commit

Permalink
Minor refactor to ParseValue (#520)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 authored May 1, 2024
1 parent 7233cdf commit f673f4c
Show file tree
Hide file tree
Showing 5 changed files with 268 additions and 274 deletions.
16 changes: 12 additions & 4 deletions lib/typing/ext/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,18 @@ func TestParseExtendedDateTime_Timestamp(t *testing.T) {
}

func TestParseExtendedDateTime(t *testing.T) {
dateString := "27/12/82"
extTime, err := ParseExtendedDateTime(dateString, []string{"02/01/06"})
assert.NoError(t, err)
assert.Equal(t, "27/12/82", extTime.String(""))
{
dateString := "27/12/82"
extTime, err := ParseExtendedDateTime(dateString, []string{"02/01/06"})
assert.NoError(t, err)
assert.Equal(t, "27/12/82", extTime.String(""))
}
{
dtString := "Mon Jan 02 15:04:05.69944 -0700 2006"
ts, err := ParseExtendedDateTime(dtString, nil)
assert.NoError(t, err)
assert.NotEqual(t, ts.String(""), dtString)
}
}

func TestTimeLayout(t *testing.T) {
Expand Down
90 changes: 90 additions & 0 deletions lib/typing/parse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package typing

import (
"reflect"
"strings"

"github.com/artie-labs/transfer/lib/typing/decimal"
"github.com/artie-labs/transfer/lib/typing/ext"
)

func ParseValue(settings Settings, key string, optionalSchema map[string]KindDetails, val any) KindDetails {
if val == nil && !settings.CreateAllColumnsIfAvailable {
// If the value is nil and `createAllColumnsIfAvailable` = false, then return `Invalid
return Invalid
}

if len(optionalSchema) > 0 {
// If the column exists in the schema, let's early exit.
if kindDetail, isOk := optionalSchema[key]; isOk {
// If the schema exists, use it as sot.
if val != nil && (kindDetail.Kind == ETime.Kind || kindDetail.Kind == EDecimal.Kind) {
// If the data type is either `ETime` or `EDecimal` and the value exists, we will not early exit
// We are not skipping so that we are able to get the exact layout specified at the row level to preserve:
// 1. Layout for time / date / timestamps
// 2. Precision and scale for numeric values
return parseValue(settings, val)
}

return kindDetail
}
}

return parseValue(settings, val)
}

func parseValue(settings Settings, val any) KindDetails {
switch convertedVal := val.(type) {
case nil:
return Invalid
case uint, int, uint8, uint16, uint32, uint64, int8, int16, int32, int64:
return Integer
case float32, float64:
// Integers will be parsed as Floats if they come from JSON
// This is a limitation with JSON - https://github.com/golang/go/issues/56719
// UNLESS Transfer is provided with a schema object, and we deliberately typecast the value to an integer
// before calling ParseValue().
return Float
case bool:
return Boolean
case string:
// If it contains space or -, then we must check against date time.
// This way, we don't penalize every string into going through this loop
// In the future, we can have specific layout RFCs run depending on the char
if strings.Contains(convertedVal, ":") || strings.Contains(convertedVal, "-") {
extendedKind, err := ext.ParseExtendedDateTime(convertedVal, settings.AdditionalDateFormats)
if err == nil {
return KindDetails{
Kind: ETime.Kind,
ExtendedTimeDetails: &extendedKind.NestedKind,
}
}
}

if IsJSON(convertedVal) {
return Struct
}

return String

case *decimal.Decimal:
return KindDetails{
Kind: EDecimal.Kind,
ExtendedDecimalDetails: convertedVal,
}
case *ext.ExtendedTime:
return KindDetails{
Kind: ETime.Kind,
ExtendedTimeDetails: &convertedVal.NestedKind,
}
default:
// Check if the val is one of our custom-types
if reflect.TypeOf(val).Kind() == reflect.Slice {
return Array
} else if reflect.TypeOf(val).Kind() == reflect.Map {
return Struct
}
}

return Invalid
}
141 changes: 141 additions & 0 deletions lib/typing/parse_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package typing

import (
"errors"
"fmt"
"math"
"testing"

"github.com/artie-labs/transfer/lib/typing/ext"
"github.com/stretchr/testify/assert"
)

func Test_ParseValue(t *testing.T) {
{

// Invalid
assert.Equal(t, ParseValue(Settings{}, "", nil, nil), Invalid)
assert.Equal(t, ParseValue(Settings{}, "", nil, errors.New("hello")), Invalid)
}
{
// Nil
assert.Equal(t, ParseValue(Settings{}, "", nil, ""), String)
assert.Equal(t, ParseValue(Settings{}, "", nil, "nil"), String)
assert.Equal(t, ParseValue(Settings{}, "", nil, nil), Invalid)
}
{
// Floats
assert.Equal(t, ParseValue(Settings{}, "", nil, 7.5), Float)
assert.Equal(t, ParseValue(Settings{}, "", nil, -7.4999999), Float)
assert.Equal(t, ParseValue(Settings{}, "", nil, 7.0), Float)
}
{
// Integers
assert.Equal(t, ParseValue(Settings{}, "", nil, 9), Integer)
assert.Equal(t, ParseValue(Settings{}, "", nil, math.MaxInt), Integer)
assert.Equal(t, ParseValue(Settings{}, "", nil, -1*math.MaxInt), Integer)
}
{
// Boolean
assert.Equal(t, ParseValue(Settings{}, "", nil, true), Boolean)
assert.Equal(t, ParseValue(Settings{}, "", nil, false), Boolean)
}
{
// Strings
possibleStrings := []string{
"dusty",
"robin",
"abc",
}

for _, possibleString := range possibleStrings {
assert.Equal(t, ParseValue(Settings{}, "", nil, possibleString), String)
}
}
{
// Arrays
assert.Equal(t, ParseValue(Settings{}, "", nil, []string{"a", "b", "c"}), Array)
assert.Equal(t, ParseValue(Settings{}, "", nil, []any{"a", 123, "c"}), Array)
assert.Equal(t, ParseValue(Settings{}, "", nil, []int64{1}), Array)
assert.Equal(t, ParseValue(Settings{}, "", nil, []bool{false}), Array)
assert.Equal(t, ParseValue(Settings{}, "", nil, []any{false, true}), Array)
}
{
// Time
kindDetails := ParseValue(Settings{}, "", nil, "00:18:11.13116+00")
assert.Equal(t, ETime.Kind, kindDetails.Kind)
assert.Equal(t, ext.TimeKindType, kindDetails.ExtendedTimeDetails.Type)
}
{
// Date layouts from Go's time.Time library
possibleDates := []any{
"01/02 03:04:05PM '06 -0700", // The reference time, in numerical order.
"Mon Jan 2 15:04:05 2006",
"Mon Jan 2 15:04:05 MST 2006",
"Mon Jan 02 15:04:05 -0700 2006",
"02 Jan 06 15:04 MST",
"02 Jan 06 15:04 -0700", // RFC822 with numeric zone
"Monday, 02-Jan-06 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone
"2019-10-12T14:20:50.52+07:00",
}

for _, possibleDate := range possibleDates {
assert.Equal(t, ParseValue(Settings{}, "", nil, possibleDate).ExtendedTimeDetails.Type, ext.DateTime.Type, fmt.Sprintf("Failed format, value is: %v", possibleDate))

// Test the parseDT function as well.
ts, err := ext.ParseExtendedDateTime(fmt.Sprint(possibleDate), []string{})
assert.NoError(t, err, err)
assert.False(t, ts.IsZero(), ts)
}

ts, err := ext.ParseExtendedDateTime("random", []string{})
assert.ErrorContains(t, err, "dtString: random is not supported")
assert.Nil(t, ts)
}
{
// Maps
randomMaps := []any{
map[string]any{
"foo": "bar",
"dog": "dusty",
"breed": "australian shepherd",
},
map[string]bool{
"foo": true,
"bar": false,
},
map[int]int{
1: 1,
2: 2,
3: 3,
},
map[string]any{
"food": map[string]any{
"pizza": "slice",
"fruit": "apple",
},
"music": []string{"a", "b", "c"},
},
}

for _, randomMap := range randomMaps {
assert.Equal(t, ParseValue(Settings{}, "", nil, randomMap), Struct, fmt.Sprintf("Failed message is: %v", randomMap))
}
}
}

func TestOptionalSchema(t *testing.T) {
{
optionalSchema := map[string]KindDetails{
"created_at": String,
}

// Respect the schema if the value is not null.
assert.Equal(t, String, ParseValue(Settings{}, "created_at", optionalSchema, "2023-01-01"))
// Kind is invalid because `createAllColumnsIfAvailable` is not enabled.
assert.Equal(t, Invalid, ParseValue(Settings{}, "created_at", optionalSchema, nil))
assert.Equal(t, String, ParseValue(Settings{CreateAllColumnsIfAvailable: true}, "created_at", optionalSchema, nil))
}
}
78 changes: 0 additions & 78 deletions lib/typing/typing.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package typing
import (
"encoding/json"
"fmt"
"reflect"
"strings"

"github.com/artie-labs/transfer/lib/config/constants"
Expand Down Expand Up @@ -100,83 +99,6 @@ func IsJSON(str string) bool {
return false
}

func ParseValue(settings Settings, key string, optionalSchema map[string]KindDetails, val any) KindDetails {
if val == nil && !settings.CreateAllColumnsIfAvailable {
// If the value is nil and `createAllColumnsIfAvailable` = false, then return `Invalid
return Invalid
}

if len(optionalSchema) > 0 {
// If the column exists in the schema, let's early exit.
if kindDetail, isOk := optionalSchema[key]; isOk {
// If the schema exists, use it as sot.
if val != nil && (kindDetail.Kind == ETime.Kind || kindDetail.Kind == EDecimal.Kind) {
// If the data type is either `ETime` or `EDecimal` and the value exists, we will not early exit
// We are not skipping so that we are able to get the exact layout specified at the row level to preserve:
// 1. Layout for time / date / timestamps
// 2. Precision and scale for numeric values
return ParseValue(settings, key, nil, val)
}

return kindDetail
}
}

switch convertedVal := val.(type) {
case nil:
return Invalid
case uint, int, uint8, uint16, uint32, uint64, int8, int16, int32, int64:
return Integer
case float32, float64:
// Integers will be parsed as Floats if they come from JSON
// This is a limitation with JSON - https://github.com/golang/go/issues/56719
// UNLESS Transfer is provided with a schema object, and we deliberately typecast the value to an integer
// before calling ParseValue().
return Float
case bool:
return Boolean
case string:
// If it contains space or -, then we must check against date time.
// This way, we don't penalize every string into going through this loop
// In the future, we can have specific layout RFCs run depending on the char
if strings.Contains(convertedVal, ":") || strings.Contains(convertedVal, "-") {
extendedKind, err := ext.ParseExtendedDateTime(convertedVal, settings.AdditionalDateFormats)
if err == nil {
return KindDetails{
Kind: ETime.Kind,
ExtendedTimeDetails: &extendedKind.NestedKind,
}
}
}

if IsJSON(convertedVal) {
return Struct
}

return String

case *decimal.Decimal:
return KindDetails{
Kind: EDecimal.Kind,
ExtendedDecimalDetails: convertedVal,
}
case *ext.ExtendedTime:
return KindDetails{
Kind: ETime.Kind,
ExtendedTimeDetails: &convertedVal.NestedKind,
}
default:
// Check if the val is one of our custom-types
if reflect.TypeOf(val).Kind() == reflect.Slice {
return Array
} else if reflect.TypeOf(val).Kind() == reflect.Map {
return Struct
}
}

return Invalid
}

func KindToDWHType(kd KindDetails, dwh constants.DestinationKind, isPk bool) string {
switch dwh {
case constants.Snowflake:
Expand Down
Loading

0 comments on commit f673f4c

Please sign in to comment.