Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More refactor around ExtendedTime #979

Merged
merged 11 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions clients/bigquery/converters/converters.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,20 @@ import (
"fmt"
"strconv"

"github.com/artie-labs/transfer/lib/typing"
"github.com/artie-labs/transfer/lib/typing/decimal"
"github.com/artie-labs/transfer/lib/typing/ext"
)

type StringConverter struct{}
type StringConverter struct {
kd typing.KindDetails
}

func NewStringConverter(kd typing.KindDetails) StringConverter {
return StringConverter{kd: kd}
}

func (StringConverter) Convert(value any) (any, error) {
func (s StringConverter) Convert(value any) (any, error) {
switch castedValue := value.(type) {
case string:
return castedValue, nil
Expand All @@ -19,7 +26,11 @@ func (StringConverter) Convert(value any) (any, error) {
case bool:
return fmt.Sprint(castedValue), nil
case *ext.ExtendedTime:
return castedValue.String(""), nil
if err := s.kd.EnsureExtendedTimeDetails(); err != nil {
return nil, err
}

return castedValue.GetTime().Format(s.kd.ExtendedTimeDetails.Format), nil
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the column format instead of .String() which relies on the value format.

default:
return nil, fmt.Errorf("expected string/*decimal.Decimal/bool received %T with value %v", value, value)
}
Expand Down
18 changes: 12 additions & 6 deletions clients/bigquery/converters/converters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,44 @@ import (
"github.com/stretchr/testify/assert"

"github.com/artie-labs/transfer/lib/numbers"
"github.com/artie-labs/transfer/lib/typing"
"github.com/artie-labs/transfer/lib/typing/decimal"
"github.com/artie-labs/transfer/lib/typing/ext"
)

func TestStringConverter_Convert(t *testing.T) {
converter := StringConverter{}
{
// String
val, err := converter.Convert("foo")
val, err := NewStringConverter(typing.String).Convert("foo")
assert.NoError(t, err)
assert.Equal(t, "foo", val)
}
{
// Decimal
val, err := converter.Convert(decimal.NewDecimal(numbers.MustParseDecimal("123")))
val, err := NewStringConverter(typing.EDecimal).Convert(decimal.NewDecimal(numbers.MustParseDecimal("123")))
assert.NoError(t, err)
assert.Equal(t, "123", val)
}
{
// Boolean
val, err := converter.Convert(true)
val, err := NewStringConverter(typing.Boolean).Convert(true)
assert.NoError(t, err)
assert.Equal(t, "true", val)
}
{
// Invalid
_, err := converter.Convert(123)
_, err := NewStringConverter(typing.Integer).Convert(123)
assert.ErrorContains(t, err, "expected string/*decimal.Decimal/bool received int with value 123")
}
{
// Extended time
val, err := converter.Convert(ext.NewExtendedTime(time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC), ext.TimestampTZKindType, ""))
val, err := NewStringConverter(typing.MustNewExtendedTimeDetails(typing.String, ext.TimestampTZKindType, "")).Convert(
ext.NewExtendedTime(
time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
ext.TimestampTZKindType,
"",
),
)
assert.NoError(t, err)
assert.Equal(t, "2021-01-01T00:00:00Z", val)
}
Expand Down
2 changes: 1 addition & 1 deletion clients/bigquery/storagewrite.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func rowToMessage(row map[string]any, columns []columns.Column, messageDescripto

message.Set(field, protoreflect.ValueOfString(decimalValue.String()))
case typing.String.Kind:
val, err := converters.StringConverter{}.Convert(value)
val, err := converters.NewStringConverter(column.KindDetails).Convert(value)
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions clients/snowflake/snowflake_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ func (s *SnowflakeTestSuite) TestExecuteMergeDeletionFlagRemoval() {
constants.DeleteColumnMarker: typing.Boolean,
constants.OnlySetDeleteColumnMarker: typing.Boolean,
// Add kindDetails to created_at
"created_at": typing.MustParseValue("", nil, time.Now().Format(time.RFC3339Nano)),
"created_at": typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, ""),
}

var cols columns.Columns
Expand Down Expand Up @@ -273,7 +273,7 @@ func (s *SnowflakeTestSuite) TestExecuteMergeDeletionFlagRemoval() {

inMemColumns := tableData.ReadOnlyInMemoryCols()
// Since sflkColumns overwrote the format, let's set it correctly again.
inMemColumns.UpdateColumn(columns.NewColumn("created_at", typing.MustParseValue("", nil, time.Now().Format(time.RFC3339Nano))))
inMemColumns.UpdateColumn(columns.NewColumn("created_at", typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimestampTZKindType, time.RFC3339Nano)))
tableData.SetInMemoryColumns(inMemColumns)
break
}
Expand Down
12 changes: 6 additions & 6 deletions clients/snowflake/staging.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,18 @@ func (s *Store) writeTemporaryTableFile(tableData *optimization.TableData, newTa
writer.Comma = '\t'

columns := tableData.ReadOnlyInMemoryCols().ValidColumns()
for _, value := range tableData.Rows() {
var row []string
for _, row := range tableData.Rows() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed the variable so it's less confusing

var csvRow []string
for _, col := range columns {
castedValue, castErr := castColValStaging(value[col.Name()], col.KindDetails)
castedValue, castErr := castColValStaging(row[col.Name()], col.KindDetails)
if castErr != nil {
return "", castErr
return "", fmt.Errorf("failed to cast value '%v': %w", row[col.Name()], castErr)
}

row = append(row, castedValue)
csvRow = append(csvRow, castedValue)
}

if err = writer.Write(row); err != nil {
if err = writer.Write(csvRow); err != nil {
return "", fmt.Errorf("failed to write to csv: %w", err)
}
}
Expand Down
7 changes: 5 additions & 2 deletions lib/optimization/table_data.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package optimization

import (
"cmp"
"fmt"
"strings"
"time"
Expand Down Expand Up @@ -272,8 +273,8 @@ func (t *TableData) MergeColumnsFromDestination(destCols ...columns.Column) erro
}

if found {
// TODO: Move this whole block into a function and add unit-tests.
inMemoryCol.KindDetails.Kind = foundColumn.KindDetails.Kind

// Copy over backfilled
inMemoryCol.SetBackfilled(foundColumn.Backfilled())

Expand All @@ -300,8 +301,10 @@ func (t *TableData) MergeColumnsFromDestination(destCols ...columns.Column) erro
}
}

// Just copy over the type since the format wouldn't be present in the destination
// Copy over the type
inMemoryCol.KindDetails.ExtendedTimeDetails.Type = foundColumn.KindDetails.ExtendedTimeDetails.Type
// If the in-memory column has no format, we should use the format from the destination.
inMemoryCol.KindDetails.ExtendedTimeDetails.Format = cmp.Or(inMemoryCol.KindDetails.ExtendedTimeDetails.Format, foundColumn.KindDetails.ExtendedTimeDetails.Format)

}

Expand Down
4 changes: 2 additions & 2 deletions lib/optimization/table_data_merge_columns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ func TestTableData_UpdateInMemoryColumnsFromDestination(t *testing.T) {
assert.True(t, isOk)
assert.Equal(t, typing.ETime.Kind, updatedColumn.KindDetails.Kind)
assert.Equal(t, ext.DateKindType, updatedColumn.KindDetails.ExtendedTimeDetails.Type)
// Format is not copied over.
assert.Equal(t, "", updatedColumn.KindDetails.ExtendedTimeDetails.Format)
// Format is copied over.
assert.Equal(t, ext.PostgresDateFormat, updatedColumn.KindDetails.ExtendedTimeDetails.Format)
}
{
// In-memory column is NUMERIC and destination column is an INTEGER
Expand Down
5 changes: 0 additions & 5 deletions lib/typing/ext/time.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,3 @@ func (e *ExtendedTime) GetTime() time.Time {
func (e *ExtendedTime) GetNestedKind() NestedKind {
return e.nestedKind
}

func (e *ExtendedTime) String(overrideFormat string) string {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No longer used!

format := cmp.Or(overrideFormat, e.nestedKind.Format)
return e.ts.Format(format)
}
4 changes: 4 additions & 0 deletions lib/typing/typing.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ func (k *KindDetails) EnsureExtendedTimeDetails() error {
return fmt.Errorf("extended time details is not set")
}

if k.ExtendedTimeDetails.Format == "" {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added an additional guardrail to ensure we don't do time.Format("")

return fmt.Errorf("extended time details format is not set")
}

return nil
}

Expand Down
3 changes: 1 addition & 2 deletions lib/typing/values/string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ func TestToString(t *testing.T) {
assert.ErrorContains(t, err, "extended time details is not set")
}
{
eTimeCol := columns.NewColumn("time", typing.ETime)
eTimeCol.KindDetails.ExtendedTimeDetails = &ext.NestedKind{Type: ext.TimeKindType}
eTimeCol := columns.NewColumn("time", typing.MustNewExtendedTimeDetails(typing.ETime, ext.TimeKindType, ""))
{
// Using [string]
val, err := ToString("2021-01-01T03:52:00Z", eTimeCol.KindDetails)
Expand Down