Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplifying Merge Columns #917

Merged
merged 6 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions lib/optimization/table_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package optimization

import (
"fmt"
"log/slog"
"strings"
"time"

Expand Down Expand Up @@ -273,34 +272,27 @@ func (t *TableData) MergeColumnsFromDestination(destCols ...columns.Column) erro
}

if found {
// TODO: Simplify this whole logic
// If the inMemoryColumn is decimal and foundColumn is integer, don't copy it over.
// This is because parsing NUMERIC(...) will return an INTEGER if there's no decimal point.
// However, this will wipe the precision unit from the INTEGER which may cause integer overflow.
shouldSkip := inMemoryCol.KindDetails.Kind == typing.EDecimal.Kind && foundColumn.KindDetails.Kind == typing.Integer.Kind
if shouldSkip {
slog.Warn("Skipping column", slog.String("column", inMemoryCol.Name()), slog.String("inMemoryKind", inMemoryCol.KindDetails.Kind), slog.String("foundKind", foundColumn.KindDetails.Kind))
} else {
// We should take `kindDetails.kind` and `backfilled` from foundCol
// We are not taking primaryKey and defaultValue because DWH does not have this information.
// Note: If our in-memory column is `Invalid`, it would get skipped during merge. However, if the column exists in
// the destination, we'll copy the type over. This is to make sure we don't miss batch updates where the whole column in the batch is NULL.
inMemoryCol.KindDetails.Kind = foundColumn.KindDetails.Kind
if foundColumn.KindDetails.OptionalStringPrecision != nil {
inMemoryCol.KindDetails.OptionalStringPrecision = foundColumn.KindDetails.OptionalStringPrecision
}
}
inMemoryCol.KindDetails.Kind = foundColumn.KindDetails.Kind

// Copy over backfilled
inMemoryCol.SetBackfilled(foundColumn.Backfilled())

// Copy over string precision, if it exists
if foundColumn.KindDetails.OptionalStringPrecision != nil {
inMemoryCol.KindDetails.OptionalStringPrecision = foundColumn.KindDetails.OptionalStringPrecision
}

// Copy over the time details
if foundColumn.KindDetails.ExtendedTimeDetails != nil {
if inMemoryCol.KindDetails.ExtendedTimeDetails == nil {
inMemoryCol.KindDetails.ExtendedTimeDetails = &ext.NestedKind{}
}

// Don't have tcKind.ExtendedTimeDetails update the format since the DWH will not have that.
// Just copy over the type since the format wouldn't be present in the destination
inMemoryCol.KindDetails.ExtendedTimeDetails.Type = foundColumn.KindDetails.ExtendedTimeDetails.Type
}

// Copy over the decimal details
if foundColumn.KindDetails.ExtendedDecimalDetails != nil && inMemoryCol.KindDetails.ExtendedDecimalDetails == nil {
inMemoryCol.KindDetails.ExtendedDecimalDetails = foundColumn.KindDetails.ExtendedDecimalDetails
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,104 +10,91 @@ import (
"github.com/stretchr/testify/assert"
)

const strCol = "string"

func TestTableData_UpdateInMemoryColumnsFromDestination(t *testing.T) {
const strCol = "string"
tableDataCols := &columns.Columns{}
tableData := &TableData{inMemoryColumns: tableDataCols}
{
tableDataCols := &columns.Columns{}
tableData := &TableData{
inMemoryColumns: tableDataCols,
}

// Trying to merge an invalid destination column
tableData.AddInMemoryCol(columns.NewColumn("foo", typing.String))
invalidCol := columns.NewColumn("foo", typing.Invalid)
assert.ErrorContains(t, tableData.MergeColumnsFromDestination(invalidCol), `column "foo" is invalid`)
}
{
// If the in-memory column is a string and the destination column is Date
// We should mark the in-memory column as date and try to parse it accordingly.
tableDataCols := &columns.Columns{}
tableData := &TableData{
inMemoryColumns: tableDataCols,
}

// In-memory column is a string and the destination column is a Date
tableData.AddInMemoryCol(columns.NewColumn("foo", typing.String))

extTime := typing.ETime
extTime.ExtendedTimeDetails = &ext.NestedKind{
Type: ext.DateKindType,
}

extTime.ExtendedTimeDetails = &ext.Date
tsCol := columns.NewColumn("foo", extTime)
assert.NoError(t, tableData.MergeColumnsFromDestination(tsCol))

col, isOk := tableData.inMemoryColumns.GetColumn("foo")
updatedColumn, isOk := tableData.inMemoryColumns.GetColumn("foo")
assert.True(t, isOk)
assert.Equal(t, typing.ETime.Kind, col.KindDetails.Kind)
assert.Equal(t, ext.DateKindType, col.KindDetails.ExtendedTimeDetails.Type)
assert.Equal(t, extTime.ExtendedTimeDetails, col.KindDetails.ExtendedTimeDetails)
assert.Equal(t, typing.ETime.Kind, updatedColumn.KindDetails.Kind)
assert.Equal(t, ext.DateKindType, updatedColumn.KindDetails.ExtendedTimeDetails.Type)
// Format is not copied over.
assert.Equal(t, "", updatedColumn.KindDetails.ExtendedTimeDetails.Format)
}
{
tableDataCols := &columns.Columns{}
tableData := &TableData{
inMemoryColumns: tableDataCols,
}

tableDataCols.AddColumn(columns.NewColumn("name", typing.String))
tableDataCols.AddColumn(columns.NewColumn("bool_backfill", typing.Boolean))
tableDataCols.AddColumn(columns.NewColumn("prev_invalid", typing.Invalid))
// In-memory column is NUMERIC and destination column is an INTEGER
tableDataCols.AddColumn(columns.NewColumn("numeric_test", typing.EDecimal))
assert.NoError(t, tableData.MergeColumnsFromDestination(columns.NewColumn("numeric_test", typing.Integer)))

// Casting these as STRING so tableColumn via this f(x) will set it correctly.
tableDataCols.AddColumn(columns.NewColumn("ext_date", typing.String))
tableDataCols.AddColumn(columns.NewColumn("ext_time", typing.String))
tableDataCols.AddColumn(columns.NewColumn("ext_datetime", typing.String))
tableDataCols.AddColumn(columns.NewColumn("ext_dec", typing.String))

extDecimalType := typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(22, 2))
tableDataCols.AddColumn(columns.NewColumn("ext_dec_filled", extDecimalType))
numericCol, isOk := tableData.inMemoryColumns.GetColumn("numeric_test")
assert.True(t, isOk)
assert.Equal(t, typing.Integer.Kind, numericCol.KindDetails.Kind)
}
{
// Boolean column that has been backfilled
tableDataCols.AddColumn(columns.NewColumn("bool_backfill", typing.Boolean))
backfilledCol := columns.NewColumn("bool_backfill", typing.Boolean)
backfilledCol.SetBackfilled(true)

tableDataCols.AddColumn(columns.NewColumn(strCol, typing.String))
// Backfill was not set
column, isOk := tableData.inMemoryColumns.GetColumn("bool_backfill")
assert.True(t, isOk)
assert.False(t, column.Backfilled())

assert.NoError(t, tableData.MergeColumnsFromDestination(backfilledCol))
// Backfill is set after merge.
column, isOk = tableData.inMemoryColumns.GetColumn("bool_backfill")
assert.True(t, isOk)
assert.True(t, column.Backfilled())
}
{
// Non-existent columns should not be copied over.
nonExistentTableCols := []string{"dusty", "the", "mini", "aussie"}
var nonExistentCols []columns.Column
for _, nonExistentTableCol := range nonExistentTableCols {
nonExistentCols = append(nonExistentCols, columns.NewColumn(nonExistentTableCol, typing.String))
}

// Testing to make sure we don't copy over non-existent columns
assert.NoError(t, tableData.MergeColumnsFromDestination(nonExistentCols...))
for _, nonExistentTableCol := range nonExistentTableCols {
_, isOk := tableData.inMemoryColumns.GetColumn(nonExistentTableCol)
assert.False(t, isOk, nonExistentTableCol)
}
}
{
// In-memory column was invalid, but the destination column is valid
tableDataCols.AddColumn(columns.NewColumn("invalid_test", typing.Invalid))
assert.NoError(t, tableData.MergeColumnsFromDestination(columns.NewColumn("invalid_test", typing.String)))

// Making sure it's still numeric
assert.NoError(t, tableData.MergeColumnsFromDestination(columns.NewColumn("numeric_test", typing.Integer)))
numericCol, isOk := tableData.inMemoryColumns.GetColumn("numeric_test")
assert.True(t, isOk)
assert.Equal(t, typing.EDecimal.Kind, numericCol.KindDetails.Kind, "numeric_test")

// Testing to make sure we're copying the kindDetails over.
assert.NoError(t, tableData.MergeColumnsFromDestination(columns.NewColumn("prev_invalid", typing.String)))
prevInvalidCol, isOk := tableData.inMemoryColumns.GetColumn("prev_invalid")
invalidCol, isOk := tableData.inMemoryColumns.GetColumn("invalid_test")
assert.True(t, isOk)
assert.Equal(t, typing.String, prevInvalidCol.KindDetails)
assert.Equal(t, typing.String.Kind, invalidCol.KindDetails.Kind)
}
{
// Casting these as STRING so tableColumn via this f(x) will set it correctly.
tableDataCols.AddColumn(columns.NewColumn("ext_date", typing.String))
tableDataCols.AddColumn(columns.NewColumn("ext_time", typing.String))
tableDataCols.AddColumn(columns.NewColumn("ext_datetime", typing.String))
tableDataCols.AddColumn(columns.NewColumn("ext_dec", typing.String))

// Testing backfill
for _, inMemoryCol := range tableData.inMemoryColumns.GetColumns() {
assert.False(t, inMemoryCol.Backfilled(), inMemoryCol.Name())
}
backfilledCol := columns.NewColumn("bool_backfill", typing.Boolean)
backfilledCol.SetBackfilled(true)
assert.NoError(t, tableData.MergeColumnsFromDestination(backfilledCol))
for _, inMemoryCol := range tableData.inMemoryColumns.GetColumns() {
if inMemoryCol.Name() == backfilledCol.Name() {
assert.True(t, inMemoryCol.Backfilled(), inMemoryCol.Name())
} else {
assert.False(t, inMemoryCol.Backfilled(), inMemoryCol.Name())
}
}
extDecimalType := typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(22, 2))
tableDataCols.AddColumn(columns.NewColumn("ext_dec_filled", extDecimalType))
tableDataCols.AddColumn(columns.NewColumn(strCol, typing.String))

// Testing extTimeDetails
for _, extTimeDetailsCol := range []string{"ext_date", "ext_time", "ext_datetime"} {
Expand Down Expand Up @@ -169,20 +156,15 @@ func TestTableData_UpdateInMemoryColumnsFromDestination(t *testing.T) {
assert.Equal(t, int32(2), extDecColFilled.KindDetails.ExtendedDecimalDetails.Scale())
}
{
tableDataCols := &columns.Columns{}
tableData := &TableData{
inMemoryColumns: tableDataCols,
}

// String (precision being copied over)
tableDataCols.AddColumn(columns.NewColumn(strCol, typing.String))
assert.NoError(t, tableData.MergeColumnsFromDestination(columns.NewColumn(strCol,
typing.KindDetails{
Kind: typing.String.Kind,
OptionalStringPrecision: typing.ToPtr(int32(123)),
}),
))

// Testing string precision
stringKindWithPrecision := typing.KindDetails{
Kind: typing.String.Kind,
OptionalStringPrecision: typing.ToPtr(int32(123)),
}

assert.NoError(t, tableData.MergeColumnsFromDestination(columns.NewColumn(strCol, stringKindWithPrecision)))
foundStrCol, isOk := tableData.inMemoryColumns.GetColumn(strCol)
assert.True(t, isOk)
assert.Equal(t, typing.String.Kind, foundStrCol.KindDetails.Kind)
Expand Down
2 changes: 2 additions & 0 deletions lib/typing/ext/time.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"time"
)

// TODO: This package should have a concept of default formats for each type.

type ExtendedTimeKindType string

const (
Expand Down