From 0b260fe0440cf9fc3c3bdbf4b741388c0a6e5f81 Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Thu, 27 Jun 2024 13:56:58 -0700 Subject: [PATCH] Decimal clean up. (#776) --- lib/cdc/util/optional_schema_test.go | 40 ++++++--------------------- lib/debezium/schema.go | 8 ++---- lib/debezium/schema_test.go | 8 ++---- lib/optimization/event_update_test.go | 6 ++-- lib/parquetutil/parse_values_test.go | 3 +- lib/typing/decimal/base.go | 6 ++-- lib/typing/decimal/decimal.go | 4 +-- lib/typing/decimal/decimal_test.go | 28 +++++++++---------- lib/typing/decimal/details.go | 18 ++++++------ lib/typing/decimal/details_test.go | 2 +- lib/typing/numeric.go | 4 +-- lib/typing/typing.go | 10 ++++++- 12 files changed, 54 insertions(+), 83 deletions(-) diff --git a/lib/cdc/util/optional_schema_test.go b/lib/cdc/util/optional_schema_test.go index d4d045b53..c126ba17e 100644 --- a/lib/cdc/util/optional_schema_test.go +++ b/lib/cdc/util/optional_schema_test.go @@ -64,38 +64,14 @@ func TestGetOptionalSchema(t *testing.T) { "boolean_test": typing.Boolean, "bool_test": typing.Boolean, "bit_test": typing.Boolean, - "numeric_test": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(decimal.PrecisionNotSpecified, decimal.DefaultScale), - }, - "numeric_5": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(5, 0), - }, - "numeric_5_2": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(5, 2), - }, - "numeric_5_6": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(5, 6), - }, - "numeric_5_0": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(5, 0), - }, - "numeric_39_0": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(39, 0), - }, - "numeric_39_2": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(39, 2), - }, - "numeric_39_6": { - Kind: typing.EDecimal.Kind, - ExtendedDecimalDetails: decimal.NewDecimalDetails(39, 6), - }, + "numeric_test": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(decimal.PrecisionNotSpecified, decimal.DefaultScale)), + "numeric_5": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(5, 0)), + "numeric_5_0": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(5, 0)), + "numeric_5_2": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(5, 2)), + "numeric_5_6": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(5, 6)), + "numeric_39_0": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(39, 0)), + "numeric_39_2": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(39, 2)), + "numeric_39_6": typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(39, 6)), }, }, } diff --git a/lib/debezium/schema.go b/lib/debezium/schema.go index 7a2f43c91..db9bcbb52 100644 --- a/lib/debezium/schema.go +++ b/lib/debezium/schema.go @@ -103,16 +103,12 @@ func (f Field) ToKindDetails() typing.KindDetails { precision = *precisionPtr } - eDecimal := typing.EDecimal - eDecimal.ExtendedDecimalDetails = decimal.NewDecimalDetails(precision, scale) - return eDecimal + return typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(precision, scale)) case KafkaVariableNumericType: // For variable numeric types, we are defaulting to a scale of 5 // This is because scale is not specified at the column level, rather at the row level // It shouldn't matter much anyway since the column type we are creating is `TEXT` to avoid boundary errors. - eDecimal := typing.EDecimal - eDecimal.ExtendedDecimalDetails = decimal.NewDecimalDetails(decimal.PrecisionNotSpecified, decimal.DefaultScale) - return eDecimal + return typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(decimal.PrecisionNotSpecified, decimal.DefaultScale)) } switch f.Type { diff --git a/lib/debezium/schema_test.go b/lib/debezium/schema_test.go index 5499c5af9..e7c74f462 100644 --- a/lib/debezium/schema_test.go +++ b/lib/debezium/schema_test.go @@ -88,12 +88,8 @@ func TestField_ToKindDetails(t *testing.T) { expectedKindDetails typing.KindDetails } - eDecimal := typing.EDecimal - eDecimal.ExtendedDecimalDetails = decimal.NewDecimalDetails(decimal.PrecisionNotSpecified, decimal.DefaultScale) - - kafkaDecimalType := typing.EDecimal - kafkaDecimalType.ExtendedDecimalDetails = decimal.NewDecimalDetails(10, 5) - + eDecimal := typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(decimal.PrecisionNotSpecified, decimal.DefaultScale)) + kafkaDecimalType := typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(10, 5)) tcs := []_tc{ { name: "int16", diff --git a/lib/optimization/event_update_test.go b/lib/optimization/event_update_test.go index 50d30a556..9c55a2857 100644 --- a/lib/optimization/event_update_test.go +++ b/lib/optimization/event_update_test.go @@ -41,8 +41,7 @@ func TestTableData_UpdateInMemoryColumnsFromDestination(t *testing.T) { tableDataCols.AddColumn(columns.NewColumn("ext_datetime", typing.String)) tableDataCols.AddColumn(columns.NewColumn("ext_dec", typing.String)) - extDecimalType := typing.EDecimal - extDecimalType.ExtendedDecimalDetails = decimal.NewDecimalDetails(22, 2) + extDecimalType := typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(22, 2)) tableDataCols.AddColumn(columns.NewColumn("ext_dec_filled", extDecimalType)) tableDataCols.AddColumn(columns.NewColumn(strCol, typing.String)) @@ -120,8 +119,7 @@ func TestTableData_UpdateInMemoryColumnsFromDestination(t *testing.T) { assert.True(t, isOk) assert.Equal(t, typing.String, extDecCol.KindDetails) - extDecimal := typing.EDecimal - extDecimal.ExtendedDecimalDetails = decimal.NewDecimalDetails(30, 2) + extDecimal := typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(30, 2)) assert.NoError(t, tableData.MergeColumnsFromDestination(columns.NewColumn("ext_dec", extDecimal))) // Now it should be ext decimal type extDecCol, isOk = tableData.inMemoryColumns.GetColumn("ext_dec") diff --git a/lib/parquetutil/parse_values_test.go b/lib/parquetutil/parse_values_test.go index ac3cb7183..70fc01bf4 100644 --- a/lib/parquetutil/parse_values_test.go +++ b/lib/parquetutil/parse_values_test.go @@ -13,8 +13,7 @@ import ( ) func TestParseValue(t *testing.T) { - eDecimal := typing.EDecimal - eDecimal.ExtendedDecimalDetails = decimal.NewDecimalDetails(30, 5) + eDecimal := typing.NewDecimalDetailsFromTemplate(typing.EDecimal, decimal.NewDetails(30, 5)) eTime := typing.ETime eTime.ExtendedTimeDetails = &ext.Time diff --git a/lib/typing/decimal/base.go b/lib/typing/decimal/base.go index c2f7402fe..a6ac4f3eb 100644 --- a/lib/typing/decimal/base.go +++ b/lib/typing/decimal/base.go @@ -6,7 +6,7 @@ import ( "github.com/artie-labs/transfer/lib/numbers" ) -func (d *DecimalDetails) isNumeric() bool { +func (d Details) isNumeric() bool { if d.precision == PrecisionNotSpecified { return false } @@ -20,7 +20,7 @@ func (d *DecimalDetails) isNumeric() bool { return numbers.BetweenEq(max(1, d.scale), d.scale+29, d.precision) } -func (d *DecimalDetails) isBigNumeric() bool { +func (d Details) isBigNumeric() bool { if d.precision == PrecisionNotSpecified { return false } @@ -34,7 +34,7 @@ func (d *DecimalDetails) isBigNumeric() bool { return numbers.BetweenEq(max(1, d.scale), d.scale+38, d.precision) } -func (d *DecimalDetails) toKind(maxPrecision int32, exceededKind string) string { +func (d Details) toKind(maxPrecision int32, exceededKind string) string { if d.precision > maxPrecision || d.precision == PrecisionNotSpecified { return exceededKind } diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index 8e3af1b5b..c220c353b 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -57,6 +57,6 @@ func (d *Decimal) String() string { return d.value.Text('f') } -func (d *Decimal) Details() DecimalDetails { - return DecimalDetails{scale: d.Scale(), precision: d.precision} +func (d *Decimal) Details() Details { + return Details{scale: d.Scale(), precision: d.precision} } diff --git a/lib/typing/decimal/decimal_test.go b/lib/typing/decimal/decimal_test.go index 4aba71760..2fc86aa60 100644 --- a/lib/typing/decimal/decimal_test.go +++ b/lib/typing/decimal/decimal_test.go @@ -15,13 +15,13 @@ func TestNewDecimal(t *testing.T) { func TestNewDecimalWithPrecision(t *testing.T) { // Precision = -1 (PrecisionNotSpecified): - assert.Equal(t, DecimalDetails{scale: 2, precision: -1}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), PrecisionNotSpecified).Details()) + assert.Equal(t, Details{scale: 2, precision: -1}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), PrecisionNotSpecified).Details()) // Precision = scale: - assert.Equal(t, DecimalDetails{scale: 2, precision: 2}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), 2).Details()) + assert.Equal(t, Details{scale: 2, precision: 2}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), 2).Details()) // Precision < scale: - assert.Equal(t, DecimalDetails{scale: 2, precision: 3}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), 1).Details()) + assert.Equal(t, Details{scale: 2, precision: 3}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), 1).Details()) // Precision > scale: - assert.Equal(t, DecimalDetails{scale: 2, precision: 4}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), 4).Details()) + assert.Equal(t, Details{scale: 2, precision: 4}, NewDecimalWithPrecision(numbers.MustParseDecimal("12.34"), 4).Details()) } func TestDecimal_Scale(t *testing.T) { @@ -36,16 +36,16 @@ func TestDecimal_Scale(t *testing.T) { func TestDecimal_Details(t *testing.T) { // -1 precision (PrecisionNotSpecified): - assert.Equal(t, DecimalDetails{scale: 0, precision: -1}, NewDecimal(numbers.MustParseDecimal("0")).Details()) - assert.Equal(t, DecimalDetails{scale: 0, precision: -1}, NewDecimal(numbers.MustParseDecimal("12345")).Details()) - assert.Equal(t, DecimalDetails{scale: 0, precision: -1}, NewDecimal(numbers.MustParseDecimal("-12")).Details()) - assert.Equal(t, DecimalDetails{scale: 2, precision: -1}, NewDecimal(numbers.MustParseDecimal("12345.12")).Details()) - assert.Equal(t, DecimalDetails{scale: 3, precision: -1}, NewDecimal(numbers.MustParseDecimal("-12345.123")).Details()) + assert.Equal(t, Details{scale: 0, precision: -1}, NewDecimal(numbers.MustParseDecimal("0")).Details()) + assert.Equal(t, Details{scale: 0, precision: -1}, NewDecimal(numbers.MustParseDecimal("12345")).Details()) + assert.Equal(t, Details{scale: 0, precision: -1}, NewDecimal(numbers.MustParseDecimal("-12")).Details()) + assert.Equal(t, Details{scale: 2, precision: -1}, NewDecimal(numbers.MustParseDecimal("12345.12")).Details()) + assert.Equal(t, Details{scale: 3, precision: -1}, NewDecimal(numbers.MustParseDecimal("-12345.123")).Details()) // 10 precision: - assert.Equal(t, DecimalDetails{scale: 0, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("0"), 10).Details()) - assert.Equal(t, DecimalDetails{scale: 0, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("12345"), 10).Details()) - assert.Equal(t, DecimalDetails{scale: 0, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("-12"), 10).Details()) - assert.Equal(t, DecimalDetails{scale: 2, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("12345.12"), 10).Details()) - assert.Equal(t, DecimalDetails{scale: 3, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("-12345.123"), 10).Details()) + assert.Equal(t, Details{scale: 0, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("0"), 10).Details()) + assert.Equal(t, Details{scale: 0, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("12345"), 10).Details()) + assert.Equal(t, Details{scale: 0, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("-12"), 10).Details()) + assert.Equal(t, Details{scale: 2, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("12345.12"), 10).Details()) + assert.Equal(t, Details{scale: 3, precision: 10}, NewDecimalWithPrecision(numbers.MustParseDecimal("-12345.123"), 10).Details()) } diff --git a/lib/typing/decimal/details.go b/lib/typing/decimal/details.go index 7c7a0fe2b..688046866 100644 --- a/lib/typing/decimal/details.go +++ b/lib/typing/decimal/details.go @@ -4,12 +4,12 @@ import ( "fmt" ) -type DecimalDetails struct { +type Details struct { scale int32 precision int32 } -func NewDecimalDetails(precision int32, scale int32) *DecimalDetails { +func NewDetails(precision int32, scale int32) Details { if scale > precision && precision != PrecisionNotSpecified { // Note: -1 precision means it's not specified. @@ -18,38 +18,38 @@ func NewDecimalDetails(precision int32, scale int32) *DecimalDetails { precision = scale + 1 } - return &DecimalDetails{ + return Details{ scale: scale, precision: precision, } } -func (d DecimalDetails) Scale() int32 { +func (d Details) Scale() int32 { return d.scale } -func (d DecimalDetails) Precision() int32 { +func (d Details) Precision() int32 { return d.precision } // SnowflakeKind - is used to determine whether a NUMERIC data type should be a STRING or NUMERIC(p, s). -func (d *DecimalDetails) SnowflakeKind() string { +func (d Details) SnowflakeKind() string { return d.toKind(MaxPrecisionBeforeString, "STRING") } // MsSQLKind - Has the same limitation as Redshift // Spec: https://learn.microsoft.com/en-us/sql/t-sql/data-types/decimal-and-numeric-transact-sql?view=sql-server-ver16#arguments -func (d *DecimalDetails) MsSQLKind() string { +func (d Details) MsSQLKind() string { return d.toKind(MaxPrecisionBeforeString, "TEXT") } // RedshiftKind - is used to determine whether a NUMERIC data type should be a TEXT or NUMERIC(p, s). -func (d *DecimalDetails) RedshiftKind() string { +func (d Details) RedshiftKind() string { return d.toKind(MaxPrecisionBeforeString, "TEXT") } // BigQueryKind - is inferring logic from: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types -func (d *DecimalDetails) BigQueryKind() string { +func (d Details) BigQueryKind() string { if d.isNumeric() { return fmt.Sprintf("NUMERIC(%v, %v)", d.precision, d.scale) } else if d.isBigNumeric() { diff --git a/lib/typing/decimal/details_test.go b/lib/typing/decimal/details_test.go index 37b9bcc21..bacaa2862 100644 --- a/lib/typing/decimal/details_test.go +++ b/lib/typing/decimal/details_test.go @@ -68,7 +68,7 @@ func TestDecimalDetailsKind(t *testing.T) { } for _, testCase := range testCases { - d := NewDecimalDetails(testCase.Precision, testCase.Scale) + d := NewDetails(testCase.Precision, testCase.Scale) assert.Equal(t, testCase.ExpectedSnowflakeKind, d.SnowflakeKind(), testCase.Name) assert.Equal(t, testCase.ExpectedRedshiftKind, d.RedshiftKind(), testCase.Name) assert.Equal(t, testCase.ExpectedBigQueryKind, d.BigQueryKind(), testCase.Name) diff --git a/lib/typing/numeric.go b/lib/typing/numeric.go index 7651c49f3..204119ebf 100644 --- a/lib/typing/numeric.go +++ b/lib/typing/numeric.go @@ -27,7 +27,5 @@ func ParseNumeric(parts []string) KindDetails { return Integer } - eDec := EDecimal - eDec.ExtendedDecimalDetails = decimal.NewDecimalDetails(parsedNumbers[0], parsedNumbers[1]) - return eDec + return NewDecimalDetailsFromTemplate(EDecimal, decimal.NewDetails(parsedNumbers[0], parsedNumbers[1])) } diff --git a/lib/typing/typing.go b/lib/typing/typing.go index 1b9556188..e55c9d964 100644 --- a/lib/typing/typing.go +++ b/lib/typing/typing.go @@ -20,7 +20,7 @@ type Settings struct { type KindDetails struct { Kind string ExtendedTimeDetails *ext.NestedKind - ExtendedDecimalDetails *decimal.DecimalDetails + ExtendedDecimalDetails *decimal.Details // Optional kind details metadata OptionalStringPrecision *int @@ -66,6 +66,14 @@ var ( } ) +func NewDecimalDetailsFromTemplate(details KindDetails, decimalDetails decimal.Details) KindDetails { + if details.ExtendedDecimalDetails == nil { + details.ExtendedDecimalDetails = &decimalDetails + } + + return details +} + func NewKindDetailsFromTemplate(details KindDetails, extendedType ext.ExtendedTimeKindType) KindDetails { if details.ExtendedTimeDetails == nil { details.ExtendedTimeDetails = &ext.NestedKind{}