From e133873160da86f6a2f2811401e05673344f6634 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:23:00 -0700 Subject: [PATCH 01/26] [debezium] Use `apd.Decimal` for `DecodeDecimal` --- clients/bigquery/storagewrite_test.go | 14 +++++-- lib/debezium/decimal.go | 49 +++--------------------- lib/debezium/decimal_test.go | 20 ---------- lib/parquetutil/parse_values_test.go | 12 +++++- lib/typing/decimal/decimal.go | 54 ++++++++++++++++++++++++--- lib/typing/decimal/decimal_test.go | 29 ++++++++++++++ lib/typing/values/string_test.go | 12 +++++- 7 files changed, 114 insertions(+), 76 deletions(-) diff --git a/clients/bigquery/storagewrite_test.go b/clients/bigquery/storagewrite_test.go index 7101e7718..36a8a0c38 100644 --- a/clients/bigquery/storagewrite_test.go +++ b/clients/bigquery/storagewrite_test.go @@ -2,7 +2,6 @@ package bigquery import ( "encoding/json" - "math/big" "testing" "time" @@ -11,6 +10,7 @@ import ( "github.com/artie-labs/transfer/lib/typing/columns" "github.com/artie-labs/transfer/lib/typing/decimal" "github.com/artie-labs/transfer/lib/typing/ext" + "github.com/cockroachdb/apd/v3" "github.com/stretchr/testify/assert" "google.golang.org/protobuf/encoding/protojson" ) @@ -98,6 +98,14 @@ func TestEncodePacked64TimeMicros(t *testing.T) { assert.Equal(t, int64(1<<32+1000), encodePacked64TimeMicros(epoch.Add(time.Duration(1)*time.Hour+time.Duration(1)*time.Millisecond))) } +func mustParseDecimal(value string) *apd.Decimal { + decimal, _, err := apd.NewFromString(value) + if err != nil { + panic(err) + } + return decimal +} + func TestRowToMessage(t *testing.T) { columns := []columns.Column{ columns.NewColumn("c_bool", typing.Boolean), @@ -129,9 +137,9 @@ func TestRowToMessage(t *testing.T) { "c_float_int32": int32(1234), "c_float_int64": int64(1234), "c_float_string": "4444.55555", - "c_numeric": decimal.NewDecimal(nil, 5, big.NewFloat(3.1415926)), + "c_numeric": decimal.NewDecimal(nil, 5, mustParseDecimal("3.1415926")), "c_string": "foo bar", - "c_string_decimal": decimal.NewDecimal(nil, 5, big.NewFloat(1.618033)), + "c_string_decimal": decimal.NewDecimal(nil, 5, mustParseDecimal("1.618033")), "c_time": ext.NewExtendedTime(time.Date(0, 0, 0, 4, 5, 6, 7, time.UTC), ext.TimeKindType, ""), "c_date": ext.NewExtendedTime(time.Date(2001, 2, 3, 0, 0, 0, 0, time.UTC), ext.DateKindType, ""), "c_datetime": ext.NewExtendedTime(time.Date(2001, 2, 3, 4, 5, 6, 7, time.UTC), ext.DateTimeKindType, ""), diff --git a/lib/debezium/decimal.go b/lib/debezium/decimal.go index b7d531518..e87f48428 100644 --- a/lib/debezium/decimal.go +++ b/lib/debezium/decimal.go @@ -62,33 +62,6 @@ func decodeBigInt(data []byte) *big.Int { return bigInt } -// decimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. -// If the new exponent is less precise then the extra digits will be truncated. -func decimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decimal { - exponentDelta := newExponent - decimal.Exponent // Exponent is negative. - - if exponentDelta == 0 { - return new(apd.Decimal).Set(decimal) - } - - coefficient := new(apd.BigInt).Set(&decimal.Coeff) - - if exponentDelta < 0 { - multiplier := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(-exponentDelta)), nil) - coefficient.Mul(coefficient, multiplier) - } else if exponentDelta > 0 { - divisor := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(exponentDelta)), nil) - coefficient.Div(coefficient, divisor) - } - - return &apd.Decimal{ - Form: decimal.Form, - Negative: decimal.Negative, - Exponent: newExponent, - Coeff: *coefficient, - } -} - // EncodeDecimal is used to encode a [apd.Decimal] to [org.apache.kafka.connect.data.Decimal]. // The scale of the value (which is the negated exponent of the decimal) is returned as the second argument. func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { @@ -102,27 +75,17 @@ func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { // EncodeDecimalWithScale is used to encode a [apd.Decimal] to [org.apache.kafka.connect.data.Decimal] // using a specific scale. -func EncodeDecimalWithScale(decimal *apd.Decimal, scale int32) []byte { +func EncodeDecimalWithScale(_decimal *apd.Decimal, scale int32) []byte { targetExponent := -scale // Negate scale since [Decimal.Exponent] is negative. - if decimal.Exponent != targetExponent { - decimal = decimalWithNewExponent(decimal, targetExponent) + if _decimal.Exponent != targetExponent { + _decimal = decimal.DecimalWithNewExponent(_decimal, targetExponent) } - bytes, _ := EncodeDecimal(decimal) + bytes, _ := EncodeDecimal(_decimal) return bytes } // DecodeDecimal is used to decode `org.apache.kafka.connect.data.Decimal`. func DecodeDecimal(data []byte, precision *int, scale int) *decimal.Decimal { - // Convert the big integer to a big float - bigFloat := new(big.Float).SetInt(decodeBigInt(data)) - - // Compute divisor as 10^scale with big.Int's Exp, then convert to big.Float - scaleInt := big.NewInt(int64(scale)) - ten := big.NewInt(10) - divisorInt := new(big.Int).Exp(ten, scaleInt, nil) - divisorFloat := new(big.Float).SetInt(divisorInt) - - // Perform the division - bigFloat.Quo(bigFloat, divisorFloat) - return decimal.NewDecimal(precision, scale, bigFloat) + _decimal := apd.NewWithBigInt(new(apd.BigInt).SetMathBigInt(decodeBigInt(data)), -int32(scale)) + return decimal.NewDecimal(precision, scale, _decimal) } diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index e3642dbed..55b1d9ead 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -49,26 +49,6 @@ func mustParseDecimal(value string) *apd.Decimal { return decimal } -func TestDecimalWithNewExponent(t *testing.T) { - assert.Equal(t, "0", decimalWithNewExponent(apd.New(0, 0), 0).Text('f')) - assert.Equal(t, "00", decimalWithNewExponent(apd.New(0, 1), 1).Text('f')) - assert.Equal(t, "0", decimalWithNewExponent(apd.New(0, 100), 0).Text('f')) - assert.Equal(t, "00", decimalWithNewExponent(apd.New(0, 0), 1).Text('f')) - assert.Equal(t, "0.0", decimalWithNewExponent(apd.New(0, 0), -1).Text('f')) - - // Same exponent: - assert.Equal(t, "12.349", decimalWithNewExponent(mustParseDecimal("12.349"), -3).Text('f')) - // More precise exponent: - assert.Equal(t, "12.3490", decimalWithNewExponent(mustParseDecimal("12.349"), -4).Text('f')) - assert.Equal(t, "12.34900", decimalWithNewExponent(mustParseDecimal("12.349"), -5).Text('f')) - // Lest precise exponent: - // Extra digits should be truncated rather than rounded. - assert.Equal(t, "12.34", decimalWithNewExponent(mustParseDecimal("12.349"), -2).Text('f')) - assert.Equal(t, "12.3", decimalWithNewExponent(mustParseDecimal("12.349"), -1).Text('f')) - assert.Equal(t, "12", decimalWithNewExponent(mustParseDecimal("12.349"), 0).Text('f')) - assert.Equal(t, "10", decimalWithNewExponent(mustParseDecimal("12.349"), 1).Text('f')) -} - func TestEncodeDecimal(t *testing.T) { testEncodeDecimal := func(value string, expectedScale int32) { bytes, scale := EncodeDecimal(mustParseDecimal(value)) diff --git a/lib/parquetutil/parse_values_test.go b/lib/parquetutil/parse_values_test.go index f1c133b49..9de756b55 100644 --- a/lib/parquetutil/parse_values_test.go +++ b/lib/parquetutil/parse_values_test.go @@ -1,11 +1,11 @@ package parquetutil import ( - "math/big" "testing" "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/typing/ext" + "github.com/cockroachdb/apd/v3" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" @@ -13,6 +13,14 @@ import ( "github.com/stretchr/testify/assert" ) +func mustParseDecimal(value string) *apd.Decimal { + decimal, _, err := apd.NewFromString(value) + if err != nil { + panic(err) + } + return decimal +} + func TestParseValue(t *testing.T) { eDecimal := typing.EDecimal eDecimal.ExtendedDecimalDetails = decimal.NewDecimal(ptr.ToInt(30), 5, nil) @@ -66,7 +74,7 @@ func TestParseValue(t *testing.T) { }, { name: "decimal", - colVal: decimal.NewDecimal(ptr.ToInt(30), 5, big.NewFloat(5000.2232)), + colVal: decimal.NewDecimal(ptr.ToInt(30), 5, mustParseDecimal("5000.2232")), colKind: columns.NewColumn("", eDecimal), expectedValue: "5000.22320", }, diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index dda4b2851..e243aa619 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -2,16 +2,18 @@ package decimal import ( "fmt" + "log/slog" "math/big" "github.com/artie-labs/transfer/lib/ptr" + "github.com/cockroachdb/apd/v3" ) -// Decimal is Artie's wrapper around *big.Float which can store large numbers w/ no precision loss. +// Decimal is Artie's wrapper around *apd.Decimal) which can store large numbers w/ no precision loss. type Decimal struct { scale int precision *int - value *big.Float + value *apd.Decimal } const ( @@ -22,7 +24,7 @@ const ( MaxPrecisionBeforeString = 38 ) -func NewDecimal(precision *int, scale int, value *big.Float) *Decimal { +func NewDecimal(precision *int, scale int, value *apd.Decimal) *Decimal { if precision != nil { if scale > *precision && *precision != -1 { // Note: -1 precision means it's not specified. @@ -52,18 +54,31 @@ func (d *Decimal) Precision() *int { // This is particularly useful for Snowflake because we're writing all the values as STRINGS into TSV format. // This function guarantees backwards compatibility. func (d *Decimal) String() string { - return d.value.Text('f', d.scale) + targetExponent := -int32(d.scale) + value := d.value + if value.Exponent != targetExponent { + value = DecimalWithNewExponent(value, targetExponent) + } + return value.Text('f') } func (d *Decimal) Value() any { + stringValue := d.String() + // -1 precision is used for variable scaled decimal // We are opting to emit this as a STRING because the value is technically unbounded (can get to ~1 GB). if d.precision != nil && (*d.precision > MaxPrecisionBeforeString || *d.precision == -1) { - return d.String() + return stringValue } // Depending on the precision, we will want to convert value to STRING or keep as a FLOAT. - return d.value + bigFloat, ok := new(big.Float).SetString(stringValue) + if !ok { + slog.Error("Unable to parse value to a big.Float", slog.String("value", stringValue)) + return stringValue + } + + return bigFloat } // SnowflakeKind - is used to determine whether a NUMERIC data type should be a STRING or NUMERIC(p, s). @@ -92,3 +107,30 @@ func (d *Decimal) BigQueryKind() string { return "STRING" } + +// decimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. +// If the new exponent is less precise then the extra digits will be truncated. +func DecimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decimal { + exponentDelta := newExponent - decimal.Exponent // Exponent is negative. + + if exponentDelta == 0 { + return new(apd.Decimal).Set(decimal) + } + + coefficient := new(apd.BigInt).Set(&decimal.Coeff) + + if exponentDelta < 0 { + multiplier := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(-exponentDelta)), nil) + coefficient.Mul(coefficient, multiplier) + } else if exponentDelta > 0 { + divisor := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(exponentDelta)), nil) + coefficient.Div(coefficient, divisor) + } + + return &apd.Decimal{ + Form: decimal.Form, + Negative: decimal.Negative, + Exponent: newExponent, + Coeff: *coefficient, + } +} diff --git a/lib/typing/decimal/decimal_test.go b/lib/typing/decimal/decimal_test.go index a11b90acb..e0841b2b8 100644 --- a/lib/typing/decimal/decimal_test.go +++ b/lib/typing/decimal/decimal_test.go @@ -3,6 +3,7 @@ package decimal import ( "testing" + "github.com/cockroachdb/apd/v3" "github.com/stretchr/testify/assert" "github.com/artie-labs/transfer/lib/ptr" @@ -76,3 +77,31 @@ func TestDecimalKind(t *testing.T) { assert.Equal(t, testCase.ExpectedBigQueryKind, d.BigQueryKind(), testCase.Name) } } + +func mustParseDecimal(value string) *apd.Decimal { + decimal, _, err := apd.NewFromString(value) + if err != nil { + panic(err) + } + return decimal +} + +func TestDecimalWithNewExponent(t *testing.T) { + assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 0), 0).Text('f')) + assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 1), 1).Text('f')) + assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 100), 0).Text('f')) + assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 0), 1).Text('f')) + assert.Equal(t, "0.0", DecimalWithNewExponent(apd.New(0, 0), -1).Text('f')) + + // Same exponent: + assert.Equal(t, "12.349", DecimalWithNewExponent(mustParseDecimal("12.349"), -3).Text('f')) + // More precise exponent: + assert.Equal(t, "12.3490", DecimalWithNewExponent(mustParseDecimal("12.349"), -4).Text('f')) + assert.Equal(t, "12.34900", DecimalWithNewExponent(mustParseDecimal("12.349"), -5).Text('f')) + // Lest precise exponent: + // Extra digits should be truncated rather than rounded. + assert.Equal(t, "12.34", DecimalWithNewExponent(mustParseDecimal("12.349"), -2).Text('f')) + assert.Equal(t, "12.3", DecimalWithNewExponent(mustParseDecimal("12.349"), -1).Text('f')) + assert.Equal(t, "12", DecimalWithNewExponent(mustParseDecimal("12.349"), 0).Text('f')) + assert.Equal(t, "10", DecimalWithNewExponent(mustParseDecimal("12.349"), 1).Text('f')) +} diff --git a/lib/typing/values/string_test.go b/lib/typing/values/string_test.go index 46dff4e5f..3e7e49564 100644 --- a/lib/typing/values/string_test.go +++ b/lib/typing/values/string_test.go @@ -1,10 +1,10 @@ package values import ( - "math/big" "testing" "time" + "github.com/cockroachdb/apd/v3" "github.com/stretchr/testify/assert" "github.com/artie-labs/transfer/lib/config/constants" @@ -20,6 +20,14 @@ func TestBooleanToBit(t *testing.T) { assert.Equal(t, 0, BooleanToBit(false)) } +func mustParseDecimal(value string) *apd.Decimal { + decimal, _, err := apd.NewFromString(value) + if err != nil { + panic(err) + } + return decimal +} + func TestToString(t *testing.T) { { // Nil value @@ -123,7 +131,7 @@ func TestToString(t *testing.T) { assert.Equal(t, "123.45", val) // Decimals - value := decimal.NewDecimal(ptr.ToInt(38), 2, big.NewFloat(585692791691858.25)) + value := decimal.NewDecimal(ptr.ToInt(38), 2, mustParseDecimal("585692791691858.25")) val, err = ToString(value, columns.Column{KindDetails: typing.EDecimal}, nil) assert.NoError(t, err) assert.Equal(t, "585692791691858.25", val) From 64f6bca0bc78f80b4cf6ac3a94df96a06d8c540c Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:24:29 -0700 Subject: [PATCH 02/26] Caps --- lib/typing/decimal/decimal.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index e243aa619..719db9909 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -108,7 +108,7 @@ func (d *Decimal) BigQueryKind() string { return "STRING" } -// decimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. +// DecimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. // If the new exponent is less precise then the extra digits will be truncated. func DecimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decimal { exponentDelta := newExponent - decimal.Exponent // Exponent is negative. From 97d4da3779b8f661059aa5793c688263cff0a24a Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:29:17 -0700 Subject: [PATCH 03/26] Comment --- lib/debezium/decimal.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/debezium/decimal.go b/lib/debezium/decimal.go index e87f48428..03d5ba065 100644 --- a/lib/debezium/decimal.go +++ b/lib/debezium/decimal.go @@ -73,7 +73,7 @@ func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { return encodeBigInt(bigIntValue), -decimal.Exponent } -// EncodeDecimalWithScale is used to encode a [apd.Decimal] to [org.apache.kafka.connect.data.Decimal] +// EncodeDecimalWithScale is used to encode a [apd.Decimal] to [org.apache.kafka.connect.data.Decimal]. // using a specific scale. func EncodeDecimalWithScale(_decimal *apd.Decimal, scale int32) []byte { targetExponent := -scale // Negate scale since [Decimal.Exponent] is negative. @@ -84,7 +84,7 @@ func EncodeDecimalWithScale(_decimal *apd.Decimal, scale int32) []byte { return bytes } -// DecodeDecimal is used to decode `org.apache.kafka.connect.data.Decimal`. +// DecodeDecimal is used to decode [org.apache.kafka.connect.data.Decimal]. func DecodeDecimal(data []byte, precision *int, scale int) *decimal.Decimal { _decimal := apd.NewWithBigInt(new(apd.BigInt).SetMathBigInt(decodeBigInt(data)), -int32(scale)) return decimal.NewDecimal(precision, scale, _decimal) From f6bbb1631639cc68c01aff6f753f0cbab22ac78f Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:34:20 -0700 Subject: [PATCH 04/26] More --- lib/debezium/decimal.go | 5 ++--- lib/debezium/decimal_test.go | 4 ++-- lib/debezium/types.go | 8 ++++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/debezium/decimal.go b/lib/debezium/decimal.go index 03d5ba065..ec76333a0 100644 --- a/lib/debezium/decimal.go +++ b/lib/debezium/decimal.go @@ -85,7 +85,6 @@ func EncodeDecimalWithScale(_decimal *apd.Decimal, scale int32) []byte { } // DecodeDecimal is used to decode [org.apache.kafka.connect.data.Decimal]. -func DecodeDecimal(data []byte, precision *int, scale int) *decimal.Decimal { - _decimal := apd.NewWithBigInt(new(apd.BigInt).SetMathBigInt(decodeBigInt(data)), -int32(scale)) - return decimal.NewDecimal(precision, scale, _decimal) +func DecodeDecimal(data []byte, scale int32) *apd.Decimal { + return apd.NewWithBigInt(new(apd.BigInt).SetMathBigInt(decodeBigInt(data)), -scale) } diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index 55b1d9ead..8ab7119ed 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -52,7 +52,7 @@ func mustParseDecimal(value string) *apd.Decimal { func TestEncodeDecimal(t *testing.T) { testEncodeDecimal := func(value string, expectedScale int32) { bytes, scale := EncodeDecimal(mustParseDecimal(value)) - actual := DecodeDecimal(bytes, nil, int(scale)).String() + actual := DecodeDecimal(bytes, scale).String() assert.Equal(t, value, actual, value) assert.Equal(t, expectedScale, scale, value) } @@ -72,7 +72,7 @@ func TestEncodeDecimal(t *testing.T) { func TestEncodeDecimalWithScale(t *testing.T) { mustEncodeAndDecodeDecimal := func(value string, scale int32) string { bytes := EncodeDecimalWithScale(mustParseDecimal(value), scale) - return DecodeDecimal(bytes, nil, int(scale)).String() + return DecodeDecimal(bytes, scale).String() } // Whole numbers: diff --git a/lib/debezium/types.go b/lib/debezium/types.go index 655156815..1c04e24c3 100644 --- a/lib/debezium/types.go +++ b/lib/debezium/types.go @@ -236,7 +236,9 @@ func (f Field) DecodeDecimal(encoded []byte) (*decimal.Decimal, error) { if err != nil { return nil, fmt.Errorf("failed to get scale and/or precision: %w", err) } - return DecodeDecimal(encoded, precision, scale), nil + + _decimal := DecodeDecimal(encoded, int32(scale)) + return decimal.NewDecimal(precision, scale, _decimal), nil } func (f Field) DecodeDebeziumVariableDecimal(value any) (*decimal.Decimal, error) { @@ -259,5 +261,7 @@ func (f Field) DecodeDebeziumVariableDecimal(value any) (*decimal.Decimal, error if err != nil { return nil, err } - return DecodeDecimal(bytes, ptr.ToInt(decimal.PrecisionNotSpecified), scale), nil + + _decimal := DecodeDecimal(bytes, int32(scale)) + return decimal.NewDecimal(ptr.ToInt(decimal.PrecisionNotSpecified), scale, _decimal), nil } From b9baad6aced12c757de4ae71f2114fd80b379a8f Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:39:09 -0700 Subject: [PATCH 05/26] Update --- lib/typing/decimal/decimal.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index 719db9909..70a54104f 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -9,7 +9,7 @@ import ( "github.com/cockroachdb/apd/v3" ) -// Decimal is Artie's wrapper around *apd.Decimal) which can store large numbers w/ no precision loss. +// Decimal is Artie's wrapper around [apd.Decimal] which can store large numbers w/ no precision loss. type Decimal struct { scale int precision *int From 19a13f61ce00c986045cba17d58f02f46289b995 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:43:14 -0700 Subject: [PATCH 06/26] Add test --- lib/debezium/decimal_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index 8ab7119ed..9916aa117 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -112,6 +112,7 @@ func TestEncodeDecimalWithScale(t *testing.T) { assert.Equal(t, "-145.1830000000000090", mustEncodeAndDecodeDecimal("-145.183000000000009", 16)) assert.Equal(t, "-9063701308.217222135", mustEncodeAndDecodeDecimal("-9063701308.217222135", 9)) + assert.Equal(t, "-74961544796695.89960242", mustEncodeAndDecodeDecimal("-74961544796695.89960242", 8)) testCases := []struct { name string From 14e5e0488ec902bb625afc7fbefeba16302852a0 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:59:27 -0700 Subject: [PATCH 07/26] More tests --- lib/debezium/decimal_test.go | 5 ++- lib/test/main.go | 76 ++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 lib/test/main.go diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index 9916aa117..204535882 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -52,8 +52,9 @@ func mustParseDecimal(value string) *apd.Decimal { func TestEncodeDecimal(t *testing.T) { testEncodeDecimal := func(value string, expectedScale int32) { bytes, scale := EncodeDecimal(mustParseDecimal(value)) - actual := DecodeDecimal(bytes, scale).String() - assert.Equal(t, value, actual, value) + actual := DecodeDecimal(bytes, scale) + assert.Equal(t, value, actual.String(), value) + assert.Equal(t, expectedScale, -actual.Exponent, value) assert.Equal(t, expectedScale, scale, value) } diff --git a/lib/test/main.go b/lib/test/main.go new file mode 100644 index 000000000..4e060b68b --- /dev/null +++ b/lib/test/main.go @@ -0,0 +1,76 @@ +package main + +import ( + "fmt" + "math/rand" + "strings" + + "github.com/artie-labs/transfer/lib/debezium" + "github.com/cockroachdb/apd/v3" +) + +func mustEncodeAndDecodeDecimal(decimal *apd.Decimal, scale int32) string { + bytes := debezium.EncodeDecimalWithScale(decimal, scale) + return debezium.DecodeDecimal(bytes, scale).Text('f') +} + +func randDigit() (byte, bool) { + offset := rand.Intn(10) + return byte(48 + offset), offset == 0 +} + +func generateNumberWithScale(maxDigitsBefore int, maxDigitsAfter int) (*apd.Decimal, int32) { + out := strings.Builder{} + + var wroteNonZero bool + for range rand.Intn(maxDigitsBefore + 1) { + digit, isZero := randDigit() + if isZero && !wroteNonZero { + continue + } + wroteNonZero = true + out.WriteByte(digit) + } + + if !wroteNonZero { + out.WriteRune('0') + } + + scale := rand.Intn(maxDigitsAfter + 1) + if scale > 0 { + out.WriteRune('.') + + for range scale { + digit, isZero := randDigit() + if !isZero { + wroteNonZero = true + } + out.WriteByte(digit) + } + } + + stringValue := out.String() + + if wroteNonZero && rand.Intn(2) == 1 { + stringValue = "-" + stringValue + } + + decimal, _, err := apd.NewFromString(stringValue) + if err != nil { + panic(err) + } + return decimal, -decimal.Exponent +} + +func main() { + for i := range 1000 { + fmt.Printf("Checking batch %d...\n", i) + for range 1_000_000 { + in, scale := generateNumberWithScale(30, 30) + out := mustEncodeAndDecodeDecimal(in, scale) + if in.Text('f') != out { + panic(fmt.Sprintf("Failed for %s -> %s", in.Text('f'), out)) + } + } + } +} From 77f24518b077f4290270d3e2586616f103dd4c86 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:59:54 -0700 Subject: [PATCH 08/26] Move --- lib/debezium/decimal_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index 204535882..3149b25fa 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -52,10 +52,11 @@ func mustParseDecimal(value string) *apd.Decimal { func TestEncodeDecimal(t *testing.T) { testEncodeDecimal := func(value string, expectedScale int32) { bytes, scale := EncodeDecimal(mustParseDecimal(value)) + assert.Equal(t, expectedScale, scale, value) + actual := DecodeDecimal(bytes, scale) assert.Equal(t, value, actual.String(), value) assert.Equal(t, expectedScale, -actual.Exponent, value) - assert.Equal(t, expectedScale, scale, value) } testEncodeDecimal("0", 0) From 1d5d48f0d8f46a0664abadad35fb5b9287a28434 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 19:00:07 -0700 Subject: [PATCH 09/26] Better --- lib/debezium/decimal_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index 3149b25fa..236b665b7 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -55,7 +55,7 @@ func TestEncodeDecimal(t *testing.T) { assert.Equal(t, expectedScale, scale, value) actual := DecodeDecimal(bytes, scale) - assert.Equal(t, value, actual.String(), value) + assert.Equal(t, value, actual.Text('f'), value) assert.Equal(t, expectedScale, -actual.Exponent, value) } From 9162e2fc8eb12aa9c0c03f3553855238ab5a2bc7 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 19:10:49 -0700 Subject: [PATCH 10/26] More --- clients/shared/utils.go | 2 +- lib/cdc/util/relational_event_decimal_test.go | 31 ++++++------------- lib/debezium/types_bench_test.go | 2 +- lib/debezium/types_test.go | 13 +------- lib/typing/decimal/decimal.go | 21 ++----------- 5 files changed, 14 insertions(+), 55 deletions(-) diff --git a/clients/shared/utils.go b/clients/shared/utils.go index 5f479fe79..e80e17a76 100644 --- a/clients/shared/utils.go +++ b/clients/shared/utils.go @@ -44,7 +44,7 @@ func DefaultValue(column columns.Column, dialect sql.Dialect, additionalDateFmts return nil, fmt.Errorf("colVal is not type *decimal.Decimal") } - return val.Value(), nil + return val.String(), nil case typing.String.Kind: return sql.QuoteLiteral(fmt.Sprint(column.DefaultValue())), nil } diff --git a/lib/cdc/util/relational_event_decimal_test.go b/lib/cdc/util/relational_event_decimal_test.go index ad287190e..590050c79 100644 --- a/lib/cdc/util/relational_event_decimal_test.go +++ b/lib/cdc/util/relational_event_decimal_test.go @@ -3,7 +3,6 @@ package util import ( "encoding/json" "io" - "math/big" "os" "testing" @@ -47,8 +46,8 @@ func TestSchemaEventPayload_Numeric_GetData(t *testing.T) { retMap, err := schemaEventPayload.GetData(nil, &kafkalib.TopicConfig{}) assert.NoError(t, err) - assert.Equal(t, "123456.789", retMap["numeric_test"].(*decimal.Decimal).Value()) - assert.Equal(t, 0, big.NewFloat(1234).Cmp(retMap["numeric_5"].(*decimal.Decimal).Value().(*big.Float))) + assert.Equal(t, "123456.789", retMap["numeric_test"].(*decimal.Decimal).String()) + assert.Equal(t, "1234", retMap["numeric_5"].(*decimal.Decimal).String()) numericWithScaleMap := map[string]string{ "numeric_5_2": "568.01", "numeric_5_6": "0.023456", @@ -56,16 +55,12 @@ func TestSchemaEventPayload_Numeric_GetData(t *testing.T) { } for key, expectedValue := range numericWithScaleMap { - // Numeric data types that actually have scale fails when comparing *big.Float using `.Cmp`, so we are using STRING() instead. - _, isOk := retMap[key].(*decimal.Decimal).Value().(*big.Float) - assert.True(t, isOk) - // Now, we know the data type is *big.Float, let's check the .String() value. assert.Equal(t, expectedValue, retMap[key].(*decimal.Decimal).String()) } - assert.Equal(t, "58569102859845154622791691858438258688", retMap["numeric_39_0"].(*decimal.Decimal).Value()) - assert.Equal(t, "5856910285984515462279169185843825868.22", retMap["numeric_39_2"].(*decimal.Decimal).Value()) - assert.Equal(t, "585691028598451546227958438258688.123456", retMap["numeric_39_6"].(*decimal.Decimal).Value()) + assert.Equal(t, "58569102859845154622791691858438258688", retMap["numeric_39_0"].(*decimal.Decimal).String()) + assert.Equal(t, "5856910285984515462279169185843825868.22", retMap["numeric_39_2"].(*decimal.Decimal).String()) + assert.Equal(t, "585691028598451546227958438258688.123456", retMap["numeric_39_6"].(*decimal.Decimal).String()) } func TestSchemaEventPayload_Decimal_GetData(t *testing.T) { @@ -79,23 +74,19 @@ func TestSchemaEventPayload_Decimal_GetData(t *testing.T) { assert.NoError(t, err) retMap, err := schemaEventPayload.GetData(nil, &kafkalib.TopicConfig{}) assert.NoError(t, err) - assert.Equal(t, "123.45", retMap["decimal_test"].(*decimal.Decimal).Value()) + assert.Equal(t, "123.45", retMap["decimal_test"].(*decimal.Decimal).String()) decimalWithScaleMap := map[string]string{ "decimal_test_5": "123", "decimal_test_5_2": "123.45", } for key, expectedValue := range decimalWithScaleMap { - // Numeric data types that actually have scale fails when comparing *big.Float using `.Cmp`, so we are using STRING() instead. - _, isOk := retMap[key].(*decimal.Decimal).Value().(*big.Float) - assert.True(t, isOk) - // Now, we know the data type is *big.Float, let's check the .String() value. assert.Equal(t, expectedValue, retMap[key].(*decimal.Decimal).String(), key) } - assert.Equal(t, "58569102859845154622791691858438258688", retMap["decimal_test_39"].(*decimal.Decimal).Value(), "decimal_test_39") - assert.Equal(t, "585691028598451546227916918584382586.22", retMap["decimal_test_39_2"].(*decimal.Decimal).Value(), "decimal_test_39_2") - assert.Equal(t, "585691028598451546227916918584388.123456", retMap["decimal_test_39_6"].(*decimal.Decimal).Value(), "decimal_test_39_6") + assert.Equal(t, "58569102859845154622791691858438258688", retMap["decimal_test_39"].(*decimal.Decimal).String(), "decimal_test_39") + assert.Equal(t, "585691028598451546227916918584382586.22", retMap["decimal_test_39_2"].(*decimal.Decimal).String(), "decimal_test_39_2") + assert.Equal(t, "585691028598451546227916918584388.123456", retMap["decimal_test_39_6"].(*decimal.Decimal).String(), "decimal_test_39_6") } func TestSchemaEventPayload_Money_GetData(t *testing.T) { @@ -115,10 +106,6 @@ func TestSchemaEventPayload_Money_GetData(t *testing.T) { } for key, expectedValue := range decimalWithScaleMap { - // Numeric data types that actually have scale fails when comparing *big.Float using `.Cmp`, so we are using STRING() instead. - _, isOk := retMap[key].(*decimal.Decimal).Value().(*big.Float) - assert.True(t, isOk) - // Now, we know the data type is *big.Float, let's check the .String() value. assert.Equal(t, expectedValue, retMap[key].(*decimal.Decimal).String(), key) } } diff --git a/lib/debezium/types_bench_test.go b/lib/debezium/types_bench_test.go index 00e763e9d..c014c9fb7 100644 --- a/lib/debezium/types_bench_test.go +++ b/lib/debezium/types_bench_test.go @@ -19,7 +19,7 @@ func BenchmarkDecodeDecimal_P64_S10(b *testing.B) { assert.NoError(b, err) dec, err := field.DecodeDecimal(bytes) assert.NoError(b, err) - assert.Equal(b, "123456789012345678901234567890123456789012345678901234.1234567889", dec.Value()) + assert.Equal(b, "123456789012345678901234567890123456789012345678901234.1234567890", dec.String()) require.NoError(b, err) } } diff --git a/lib/debezium/types_test.go b/lib/debezium/types_test.go index 2cbcaef10..27948d4c3 100644 --- a/lib/debezium/types_test.go +++ b/lib/debezium/types_test.go @@ -1,7 +1,6 @@ package debezium import ( - "math/big" "testing" "time" @@ -618,9 +617,6 @@ func TestField_DecodeDecimal(t *testing.T) { } assert.NoError(t, err) - decVal := dec.Value() - _, isOk := decVal.(*big.Float) - assert.Equal(t, testCase.expectBigFloat, isOk, testCase.name) assert.Equal(t, testCase.expectedValue, dec.String(), testCase.name) if testCase.expectNilPtrPrecision { @@ -713,16 +709,9 @@ func TestField_DecodeDebeziumVariableDecimal(t *testing.T) { continue } - // It should never be a *big.Float - _, isOk := dec.Value().(*big.Float) - assert.False(t, isOk, testCase.name) - - // It should be a string instead. - _, isOk = dec.Value().(string) - assert.True(t, isOk, testCase.name) assert.Equal(t, -1, *dec.Precision(), testCase.name) assert.Equal(t, testCase.expectedScale, dec.Scale(), testCase.name) - assert.Equal(t, testCase.expectedValue, dec.Value(), testCase.name) + assert.Equal(t, testCase.expectedValue, dec.String(), testCase.name) } } diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index 70a54104f..83111cf15 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -2,8 +2,6 @@ package decimal import ( "fmt" - "log/slog" - "math/big" "github.com/artie-labs/transfer/lib/ptr" "github.com/cockroachdb/apd/v3" @@ -62,23 +60,8 @@ func (d *Decimal) String() string { return value.Text('f') } -func (d *Decimal) Value() any { - stringValue := d.String() - - // -1 precision is used for variable scaled decimal - // We are opting to emit this as a STRING because the value is technically unbounded (can get to ~1 GB). - if d.precision != nil && (*d.precision > MaxPrecisionBeforeString || *d.precision == -1) { - return stringValue - } - - // Depending on the precision, we will want to convert value to STRING or keep as a FLOAT. - bigFloat, ok := new(big.Float).SetString(stringValue) - if !ok { - slog.Error("Unable to parse value to a big.Float", slog.String("value", stringValue)) - return stringValue - } - - return bigFloat +func (d *Decimal) Value() *apd.Decimal { + return d.value } // SnowflakeKind - is used to determine whether a NUMERIC data type should be a STRING or NUMERIC(p, s). From c1bd606ab8b37db406f26e54aae0443476f67647 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 19:12:02 -0700 Subject: [PATCH 11/26] Remove --- lib/test/main.go | 76 ------------------------------------------------ 1 file changed, 76 deletions(-) delete mode 100644 lib/test/main.go diff --git a/lib/test/main.go b/lib/test/main.go deleted file mode 100644 index 4e060b68b..000000000 --- a/lib/test/main.go +++ /dev/null @@ -1,76 +0,0 @@ -package main - -import ( - "fmt" - "math/rand" - "strings" - - "github.com/artie-labs/transfer/lib/debezium" - "github.com/cockroachdb/apd/v3" -) - -func mustEncodeAndDecodeDecimal(decimal *apd.Decimal, scale int32) string { - bytes := debezium.EncodeDecimalWithScale(decimal, scale) - return debezium.DecodeDecimal(bytes, scale).Text('f') -} - -func randDigit() (byte, bool) { - offset := rand.Intn(10) - return byte(48 + offset), offset == 0 -} - -func generateNumberWithScale(maxDigitsBefore int, maxDigitsAfter int) (*apd.Decimal, int32) { - out := strings.Builder{} - - var wroteNonZero bool - for range rand.Intn(maxDigitsBefore + 1) { - digit, isZero := randDigit() - if isZero && !wroteNonZero { - continue - } - wroteNonZero = true - out.WriteByte(digit) - } - - if !wroteNonZero { - out.WriteRune('0') - } - - scale := rand.Intn(maxDigitsAfter + 1) - if scale > 0 { - out.WriteRune('.') - - for range scale { - digit, isZero := randDigit() - if !isZero { - wroteNonZero = true - } - out.WriteByte(digit) - } - } - - stringValue := out.String() - - if wroteNonZero && rand.Intn(2) == 1 { - stringValue = "-" + stringValue - } - - decimal, _, err := apd.NewFromString(stringValue) - if err != nil { - panic(err) - } - return decimal, -decimal.Exponent -} - -func main() { - for i := range 1000 { - fmt.Printf("Checking batch %d...\n", i) - for range 1_000_000 { - in, scale := generateNumberWithScale(30, 30) - out := mustEncodeAndDecodeDecimal(in, scale) - if in.Text('f') != out { - panic(fmt.Sprintf("Failed for %s -> %s", in.Text('f'), out)) - } - } - } -} From 55c4ace9989866c1c70bb11dd21025f713c84f58 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 23:57:21 -0700 Subject: [PATCH 12/26] Little cleanups --- lib/debezium/decimal.go | 10 ++-- lib/numbers/decimal.go | 39 ++++++++++++++ lib/numbers/decimal_test.go | 28 ++++++++++ lib/parquetutil/parse_values_test.go | 12 +---- lib/test/main.go | 76 ++++++++++++++++++++++++++++ lib/typing/decimal/decimal.go | 34 +------------ lib/typing/decimal/decimal_test.go | 29 ----------- lib/typing/values/string_test.go | 12 ++--- 8 files changed, 155 insertions(+), 85 deletions(-) create mode 100644 lib/numbers/decimal.go create mode 100644 lib/numbers/decimal_test.go create mode 100644 lib/test/main.go diff --git a/lib/debezium/decimal.go b/lib/debezium/decimal.go index ec76333a0..c1976a929 100644 --- a/lib/debezium/decimal.go +++ b/lib/debezium/decimal.go @@ -4,7 +4,7 @@ import ( "math/big" "slices" - "github.com/artie-labs/transfer/lib/typing/decimal" + "github.com/artie-labs/transfer/lib/numbers" "github.com/cockroachdb/apd/v3" ) @@ -75,12 +75,12 @@ func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { // EncodeDecimalWithScale is used to encode a [apd.Decimal] to [org.apache.kafka.connect.data.Decimal]. // using a specific scale. -func EncodeDecimalWithScale(_decimal *apd.Decimal, scale int32) []byte { +func EncodeDecimalWithScale(decimal *apd.Decimal, scale int32) []byte { targetExponent := -scale // Negate scale since [Decimal.Exponent] is negative. - if _decimal.Exponent != targetExponent { - _decimal = decimal.DecimalWithNewExponent(_decimal, targetExponent) + if decimal.Exponent != targetExponent { + decimal = numbers.DecimalWithNewExponent(decimal, targetExponent) } - bytes, _ := EncodeDecimal(_decimal) + bytes, _ := EncodeDecimal(decimal) return bytes } diff --git a/lib/numbers/decimal.go b/lib/numbers/decimal.go new file mode 100644 index 000000000..e042fe447 --- /dev/null +++ b/lib/numbers/decimal.go @@ -0,0 +1,39 @@ +package numbers + +import "github.com/cockroachdb/apd/v3" + +// MustParseDecimal parses a string to an [apd.Decimal] or panics -- used for tests. +func MustParseDecimal(value string) *apd.Decimal { + decimal, _, err := apd.NewFromString(value) + if err != nil { + panic(err) + } + return decimal +} + +// DecimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. +// If the new exponent is less precise then the extra digits will be truncated. +func DecimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decimal { + exponentDelta := newExponent - decimal.Exponent // Exponent is negative. + + if exponentDelta == 0 { + return new(apd.Decimal).Set(decimal) + } + + coefficient := new(apd.BigInt).Set(&decimal.Coeff) + + if exponentDelta < 0 { + multiplier := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(-exponentDelta)), nil) + coefficient.Mul(coefficient, multiplier) + } else if exponentDelta > 0 { + divisor := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(exponentDelta)), nil) + coefficient.Div(coefficient, divisor) + } + + return &apd.Decimal{ + Form: decimal.Form, + Negative: decimal.Negative, + Exponent: newExponent, + Coeff: *coefficient, + } +} diff --git a/lib/numbers/decimal_test.go b/lib/numbers/decimal_test.go new file mode 100644 index 000000000..a7ec21868 --- /dev/null +++ b/lib/numbers/decimal_test.go @@ -0,0 +1,28 @@ +package numbers + +import ( + "testing" + + "github.com/cockroachdb/apd/v3" + "github.com/stretchr/testify/assert" +) + +func TestDecimalWithNewExponent(t *testing.T) { + assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 0), 0).Text('f')) + assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 1), 1).Text('f')) + assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 100), 0).Text('f')) + assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 0), 1).Text('f')) + assert.Equal(t, "0.0", DecimalWithNewExponent(apd.New(0, 0), -1).Text('f')) + + // Same exponent: + assert.Equal(t, "12.349", DecimalWithNewExponent(MustParseDecimal("12.349"), -3).Text('f')) + // More precise exponent: + assert.Equal(t, "12.3490", DecimalWithNewExponent(MustParseDecimal("12.349"), -4).Text('f')) + assert.Equal(t, "12.34900", DecimalWithNewExponent(MustParseDecimal("12.349"), -5).Text('f')) + // Lest precise exponent: + // Extra digits should be truncated rather than rounded. + assert.Equal(t, "12.34", DecimalWithNewExponent(MustParseDecimal("12.349"), -2).Text('f')) + assert.Equal(t, "12.3", DecimalWithNewExponent(MustParseDecimal("12.349"), -1).Text('f')) + assert.Equal(t, "12", DecimalWithNewExponent(MustParseDecimal("12.349"), 0).Text('f')) + assert.Equal(t, "10", DecimalWithNewExponent(MustParseDecimal("12.349"), 1).Text('f')) +} diff --git a/lib/parquetutil/parse_values_test.go b/lib/parquetutil/parse_values_test.go index 9de756b55..457c99834 100644 --- a/lib/parquetutil/parse_values_test.go +++ b/lib/parquetutil/parse_values_test.go @@ -3,9 +3,9 @@ package parquetutil import ( "testing" + "github.com/artie-labs/transfer/lib/numbers" "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/typing/ext" - "github.com/cockroachdb/apd/v3" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" @@ -13,14 +13,6 @@ import ( "github.com/stretchr/testify/assert" ) -func mustParseDecimal(value string) *apd.Decimal { - decimal, _, err := apd.NewFromString(value) - if err != nil { - panic(err) - } - return decimal -} - func TestParseValue(t *testing.T) { eDecimal := typing.EDecimal eDecimal.ExtendedDecimalDetails = decimal.NewDecimal(ptr.ToInt(30), 5, nil) @@ -74,7 +66,7 @@ func TestParseValue(t *testing.T) { }, { name: "decimal", - colVal: decimal.NewDecimal(ptr.ToInt(30), 5, mustParseDecimal("5000.2232")), + colVal: decimal.NewDecimal(ptr.ToInt(30), 5, numbers.MustParseDecimal("5000.2232")), colKind: columns.NewColumn("", eDecimal), expectedValue: "5000.22320", }, diff --git a/lib/test/main.go b/lib/test/main.go new file mode 100644 index 000000000..4e060b68b --- /dev/null +++ b/lib/test/main.go @@ -0,0 +1,76 @@ +package main + +import ( + "fmt" + "math/rand" + "strings" + + "github.com/artie-labs/transfer/lib/debezium" + "github.com/cockroachdb/apd/v3" +) + +func mustEncodeAndDecodeDecimal(decimal *apd.Decimal, scale int32) string { + bytes := debezium.EncodeDecimalWithScale(decimal, scale) + return debezium.DecodeDecimal(bytes, scale).Text('f') +} + +func randDigit() (byte, bool) { + offset := rand.Intn(10) + return byte(48 + offset), offset == 0 +} + +func generateNumberWithScale(maxDigitsBefore int, maxDigitsAfter int) (*apd.Decimal, int32) { + out := strings.Builder{} + + var wroteNonZero bool + for range rand.Intn(maxDigitsBefore + 1) { + digit, isZero := randDigit() + if isZero && !wroteNonZero { + continue + } + wroteNonZero = true + out.WriteByte(digit) + } + + if !wroteNonZero { + out.WriteRune('0') + } + + scale := rand.Intn(maxDigitsAfter + 1) + if scale > 0 { + out.WriteRune('.') + + for range scale { + digit, isZero := randDigit() + if !isZero { + wroteNonZero = true + } + out.WriteByte(digit) + } + } + + stringValue := out.String() + + if wroteNonZero && rand.Intn(2) == 1 { + stringValue = "-" + stringValue + } + + decimal, _, err := apd.NewFromString(stringValue) + if err != nil { + panic(err) + } + return decimal, -decimal.Exponent +} + +func main() { + for i := range 1000 { + fmt.Printf("Checking batch %d...\n", i) + for range 1_000_000 { + in, scale := generateNumberWithScale(30, 30) + out := mustEncodeAndDecodeDecimal(in, scale) + if in.Text('f') != out { + panic(fmt.Sprintf("Failed for %s -> %s", in.Text('f'), out)) + } + } + } +} diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index 83111cf15..68b903a90 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -3,6 +3,7 @@ package decimal import ( "fmt" + "github.com/artie-labs/transfer/lib/numbers" "github.com/artie-labs/transfer/lib/ptr" "github.com/cockroachdb/apd/v3" ) @@ -55,15 +56,11 @@ func (d *Decimal) String() string { targetExponent := -int32(d.scale) value := d.value if value.Exponent != targetExponent { - value = DecimalWithNewExponent(value, targetExponent) + value = numbers.DecimalWithNewExponent(value, targetExponent) } return value.Text('f') } -func (d *Decimal) Value() *apd.Decimal { - return d.value -} - // SnowflakeKind - is used to determine whether a NUMERIC data type should be a STRING or NUMERIC(p, s). func (d *Decimal) SnowflakeKind() string { return d.toKind(MaxPrecisionBeforeString, "STRING") @@ -90,30 +87,3 @@ func (d *Decimal) BigQueryKind() string { return "STRING" } - -// DecimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. -// If the new exponent is less precise then the extra digits will be truncated. -func DecimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decimal { - exponentDelta := newExponent - decimal.Exponent // Exponent is negative. - - if exponentDelta == 0 { - return new(apd.Decimal).Set(decimal) - } - - coefficient := new(apd.BigInt).Set(&decimal.Coeff) - - if exponentDelta < 0 { - multiplier := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(-exponentDelta)), nil) - coefficient.Mul(coefficient, multiplier) - } else if exponentDelta > 0 { - divisor := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(exponentDelta)), nil) - coefficient.Div(coefficient, divisor) - } - - return &apd.Decimal{ - Form: decimal.Form, - Negative: decimal.Negative, - Exponent: newExponent, - Coeff: *coefficient, - } -} diff --git a/lib/typing/decimal/decimal_test.go b/lib/typing/decimal/decimal_test.go index e0841b2b8..a11b90acb 100644 --- a/lib/typing/decimal/decimal_test.go +++ b/lib/typing/decimal/decimal_test.go @@ -3,7 +3,6 @@ package decimal import ( "testing" - "github.com/cockroachdb/apd/v3" "github.com/stretchr/testify/assert" "github.com/artie-labs/transfer/lib/ptr" @@ -77,31 +76,3 @@ func TestDecimalKind(t *testing.T) { assert.Equal(t, testCase.ExpectedBigQueryKind, d.BigQueryKind(), testCase.Name) } } - -func mustParseDecimal(value string) *apd.Decimal { - decimal, _, err := apd.NewFromString(value) - if err != nil { - panic(err) - } - return decimal -} - -func TestDecimalWithNewExponent(t *testing.T) { - assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 0), 0).Text('f')) - assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 1), 1).Text('f')) - assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 100), 0).Text('f')) - assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 0), 1).Text('f')) - assert.Equal(t, "0.0", DecimalWithNewExponent(apd.New(0, 0), -1).Text('f')) - - // Same exponent: - assert.Equal(t, "12.349", DecimalWithNewExponent(mustParseDecimal("12.349"), -3).Text('f')) - // More precise exponent: - assert.Equal(t, "12.3490", DecimalWithNewExponent(mustParseDecimal("12.349"), -4).Text('f')) - assert.Equal(t, "12.34900", DecimalWithNewExponent(mustParseDecimal("12.349"), -5).Text('f')) - // Lest precise exponent: - // Extra digits should be truncated rather than rounded. - assert.Equal(t, "12.34", DecimalWithNewExponent(mustParseDecimal("12.349"), -2).Text('f')) - assert.Equal(t, "12.3", DecimalWithNewExponent(mustParseDecimal("12.349"), -1).Text('f')) - assert.Equal(t, "12", DecimalWithNewExponent(mustParseDecimal("12.349"), 0).Text('f')) - assert.Equal(t, "10", DecimalWithNewExponent(mustParseDecimal("12.349"), 1).Text('f')) -} diff --git a/lib/typing/values/string_test.go b/lib/typing/values/string_test.go index 3e7e49564..62a7ac9cc 100644 --- a/lib/typing/values/string_test.go +++ b/lib/typing/values/string_test.go @@ -20,14 +20,6 @@ func TestBooleanToBit(t *testing.T) { assert.Equal(t, 0, BooleanToBit(false)) } -func mustParseDecimal(value string) *apd.Decimal { - decimal, _, err := apd.NewFromString(value) - if err != nil { - panic(err) - } - return decimal -} - func TestToString(t *testing.T) { { // Nil value @@ -131,7 +123,9 @@ func TestToString(t *testing.T) { assert.Equal(t, "123.45", val) // Decimals - value := decimal.NewDecimal(ptr.ToInt(38), 2, mustParseDecimal("585692791691858.25")) + _decimal, _, err := apd.NewFromString("585692791691858.25") + assert.NoError(t, err) + value := decimal.NewDecimal(ptr.ToInt(38), 2, _decimal) val, err = ToString(value, columns.Column{KindDetails: typing.EDecimal}, nil) assert.NoError(t, err) assert.Equal(t, "585692791691858.25", val) From db1a91b2d33189fe5ead149e129515537f5f0f5d Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 23:58:02 -0700 Subject: [PATCH 13/26] Revert --- lib/test/main.go | 76 ------------------------------------------------ 1 file changed, 76 deletions(-) delete mode 100644 lib/test/main.go diff --git a/lib/test/main.go b/lib/test/main.go deleted file mode 100644 index 4e060b68b..000000000 --- a/lib/test/main.go +++ /dev/null @@ -1,76 +0,0 @@ -package main - -import ( - "fmt" - "math/rand" - "strings" - - "github.com/artie-labs/transfer/lib/debezium" - "github.com/cockroachdb/apd/v3" -) - -func mustEncodeAndDecodeDecimal(decimal *apd.Decimal, scale int32) string { - bytes := debezium.EncodeDecimalWithScale(decimal, scale) - return debezium.DecodeDecimal(bytes, scale).Text('f') -} - -func randDigit() (byte, bool) { - offset := rand.Intn(10) - return byte(48 + offset), offset == 0 -} - -func generateNumberWithScale(maxDigitsBefore int, maxDigitsAfter int) (*apd.Decimal, int32) { - out := strings.Builder{} - - var wroteNonZero bool - for range rand.Intn(maxDigitsBefore + 1) { - digit, isZero := randDigit() - if isZero && !wroteNonZero { - continue - } - wroteNonZero = true - out.WriteByte(digit) - } - - if !wroteNonZero { - out.WriteRune('0') - } - - scale := rand.Intn(maxDigitsAfter + 1) - if scale > 0 { - out.WriteRune('.') - - for range scale { - digit, isZero := randDigit() - if !isZero { - wroteNonZero = true - } - out.WriteByte(digit) - } - } - - stringValue := out.String() - - if wroteNonZero && rand.Intn(2) == 1 { - stringValue = "-" + stringValue - } - - decimal, _, err := apd.NewFromString(stringValue) - if err != nil { - panic(err) - } - return decimal, -decimal.Exponent -} - -func main() { - for i := range 1000 { - fmt.Printf("Checking batch %d...\n", i) - for range 1_000_000 { - in, scale := generateNumberWithScale(30, 30) - out := mustEncodeAndDecodeDecimal(in, scale) - if in.Text('f') != out { - panic(fmt.Sprintf("Failed for %s -> %s", in.Text('f'), out)) - } - } - } -} From 47489e7b944be4260d4beebf85db27675221b64b Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Tue, 25 Jun 2024 23:58:37 -0700 Subject: [PATCH 14/26] Space --- lib/debezium/types.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/debezium/types.go b/lib/debezium/types.go index 1c04e24c3..13917104a 100644 --- a/lib/debezium/types.go +++ b/lib/debezium/types.go @@ -236,7 +236,6 @@ func (f Field) DecodeDecimal(encoded []byte) (*decimal.Decimal, error) { if err != nil { return nil, fmt.Errorf("failed to get scale and/or precision: %w", err) } - _decimal := DecodeDecimal(encoded, int32(scale)) return decimal.NewDecimal(precision, scale, _decimal), nil } @@ -261,7 +260,6 @@ func (f Field) DecodeDebeziumVariableDecimal(value any) (*decimal.Decimal, error if err != nil { return nil, err } - _decimal := DecodeDecimal(bytes, int32(scale)) return decimal.NewDecimal(ptr.ToInt(decimal.PrecisionNotSpecified), scale, _decimal), nil } From aee754d7d05579cfdad35d3d2db9e679cd6b49cf Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:05:50 -0700 Subject: [PATCH 15/26] Kill scale --- clients/bigquery/storagewrite_test.go | 4 ++-- lib/debezium/types.go | 4 ++-- lib/parquetutil/parse_values_test.go | 2 +- lib/typing/decimal/decimal.go | 18 ++++++------------ lib/typing/values/string_test.go | 2 +- 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/clients/bigquery/storagewrite_test.go b/clients/bigquery/storagewrite_test.go index 36a8a0c38..7a0c5c073 100644 --- a/clients/bigquery/storagewrite_test.go +++ b/clients/bigquery/storagewrite_test.go @@ -137,9 +137,9 @@ func TestRowToMessage(t *testing.T) { "c_float_int32": int32(1234), "c_float_int64": int64(1234), "c_float_string": "4444.55555", - "c_numeric": decimal.NewDecimal(nil, 5, mustParseDecimal("3.1415926")), + "c_numeric": decimal.NewDecimal(nil, mustParseDecimal("3.14159")), "c_string": "foo bar", - "c_string_decimal": decimal.NewDecimal(nil, 5, mustParseDecimal("1.618033")), + "c_string_decimal": decimal.NewDecimal(nil, mustParseDecimal("1.61803")), "c_time": ext.NewExtendedTime(time.Date(0, 0, 0, 4, 5, 6, 7, time.UTC), ext.TimeKindType, ""), "c_date": ext.NewExtendedTime(time.Date(2001, 2, 3, 0, 0, 0, 0, time.UTC), ext.DateKindType, ""), "c_datetime": ext.NewExtendedTime(time.Date(2001, 2, 3, 4, 5, 6, 7, time.UTC), ext.DateTimeKindType, ""), diff --git a/lib/debezium/types.go b/lib/debezium/types.go index 13917104a..c7af11e9c 100644 --- a/lib/debezium/types.go +++ b/lib/debezium/types.go @@ -237,7 +237,7 @@ func (f Field) DecodeDecimal(encoded []byte) (*decimal.Decimal, error) { return nil, fmt.Errorf("failed to get scale and/or precision: %w", err) } _decimal := DecodeDecimal(encoded, int32(scale)) - return decimal.NewDecimal(precision, scale, _decimal), nil + return decimal.NewDecimal(precision, _decimal), nil } func (f Field) DecodeDebeziumVariableDecimal(value any) (*decimal.Decimal, error) { @@ -261,5 +261,5 @@ func (f Field) DecodeDebeziumVariableDecimal(value any) (*decimal.Decimal, error return nil, err } _decimal := DecodeDecimal(bytes, int32(scale)) - return decimal.NewDecimal(ptr.ToInt(decimal.PrecisionNotSpecified), scale, _decimal), nil + return decimal.NewDecimal(ptr.ToInt(decimal.PrecisionNotSpecified), _decimal), nil } diff --git a/lib/parquetutil/parse_values_test.go b/lib/parquetutil/parse_values_test.go index 7953368b0..5174ddb7d 100644 --- a/lib/parquetutil/parse_values_test.go +++ b/lib/parquetutil/parse_values_test.go @@ -66,7 +66,7 @@ func TestParseValue(t *testing.T) { }, { name: "decimal", - colVal: decimal.NewDecimal(ptr.ToInt(30), 5, numbers.MustParseDecimal("5000.2232")), + colVal: decimal.NewDecimal(ptr.ToInt(30), numbers.MustParseDecimal("5000.22320")), colKind: columns.NewColumn("", eDecimal), expectedValue: "5000.22320", }, diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index 281546a25..06c83b2fa 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -1,14 +1,12 @@ package decimal import ( - "github.com/artie-labs/transfer/lib/numbers" "github.com/artie-labs/transfer/lib/ptr" "github.com/cockroachdb/apd/v3" ) // Decimal is Artie's wrapper around [apd.Decimal] which can store large numbers w/ no precision loss. type Decimal struct { - scale int precision *int value *apd.Decimal } @@ -21,7 +19,9 @@ const ( MaxPrecisionBeforeString = 38 ) -func NewDecimal(precision *int, scale int, value *apd.Decimal) *Decimal { +func NewDecimal(precision *int, value *apd.Decimal) *Decimal { + scale := int(-value.Exponent) + if precision != nil { if scale > *precision && *precision != -1 { // Note: -1 precision means it's not specified. @@ -33,14 +33,13 @@ func NewDecimal(precision *int, scale int, value *apd.Decimal) *Decimal { } return &Decimal{ - scale: scale, precision: precision, value: value, } } func (d *Decimal) Scale() int { - return d.scale + return int(-d.value.Exponent) } func (d *Decimal) Precision() *int { @@ -51,14 +50,9 @@ func (d *Decimal) Precision() *int { // This is particularly useful for Snowflake because we're writing all the values as STRINGS into TSV format. // This function guarantees backwards compatibility. func (d *Decimal) String() string { - targetExponent := -int32(d.scale) - value := d.value - if value.Exponent != targetExponent { - value = numbers.DecimalWithNewExponent(value, targetExponent) - } - return value.Text('f') + return d.value.Text('f') } func (d *Decimal) Details() DecimalDetails { - return DecimalDetails{scale: d.scale, precision: d.precision} + return DecimalDetails{scale: d.Scale(), precision: d.precision} } diff --git a/lib/typing/values/string_test.go b/lib/typing/values/string_test.go index 62a7ac9cc..af33186d3 100644 --- a/lib/typing/values/string_test.go +++ b/lib/typing/values/string_test.go @@ -125,7 +125,7 @@ func TestToString(t *testing.T) { // Decimals _decimal, _, err := apd.NewFromString("585692791691858.25") assert.NoError(t, err) - value := decimal.NewDecimal(ptr.ToInt(38), 2, _decimal) + value := decimal.NewDecimal(ptr.ToInt(38), _decimal) val, err = ToString(value, columns.Column{KindDetails: typing.EDecimal}, nil) assert.NoError(t, err) assert.Equal(t, "585692791691858.25", val) From f6f1feec92f40f33f97689086f9a8da695a94e2f Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:08:35 -0700 Subject: [PATCH 16/26] Revert --- lib/debezium/decimal.go | 30 +++++++++++++++++++++++++-- lib/debezium/decimal_test.go | 20 ++++++++++++++++++ lib/numbers/decimal.go | 39 ------------------------------------ lib/numbers/decimal_test.go | 28 -------------------------- 4 files changed, 48 insertions(+), 69 deletions(-) delete mode 100644 lib/numbers/decimal.go delete mode 100644 lib/numbers/decimal_test.go diff --git a/lib/debezium/decimal.go b/lib/debezium/decimal.go index c1976a929..b44dc3f63 100644 --- a/lib/debezium/decimal.go +++ b/lib/debezium/decimal.go @@ -4,7 +4,6 @@ import ( "math/big" "slices" - "github.com/artie-labs/transfer/lib/numbers" "github.com/cockroachdb/apd/v3" ) @@ -62,6 +61,33 @@ func decodeBigInt(data []byte) *big.Int { return bigInt } +// decimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. +// If the new exponent is less precise then the extra digits will be truncated. +func decimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decimal { + exponentDelta := newExponent - decimal.Exponent // Exponent is negative. + + if exponentDelta == 0 { + return new(apd.Decimal).Set(decimal) + } + + coefficient := new(apd.BigInt).Set(&decimal.Coeff) + + if exponentDelta < 0 { + multiplier := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(-exponentDelta)), nil) + coefficient.Mul(coefficient, multiplier) + } else if exponentDelta > 0 { + divisor := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(exponentDelta)), nil) + coefficient.Div(coefficient, divisor) + } + + return &apd.Decimal{ + Form: decimal.Form, + Negative: decimal.Negative, + Exponent: newExponent, + Coeff: *coefficient, + } +} + // EncodeDecimal is used to encode a [apd.Decimal] to [org.apache.kafka.connect.data.Decimal]. // The scale of the value (which is the negated exponent of the decimal) is returned as the second argument. func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { @@ -78,7 +104,7 @@ func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { func EncodeDecimalWithScale(decimal *apd.Decimal, scale int32) []byte { targetExponent := -scale // Negate scale since [Decimal.Exponent] is negative. if decimal.Exponent != targetExponent { - decimal = numbers.DecimalWithNewExponent(decimal, targetExponent) + decimal = decimalWithNewExponent(decimal, targetExponent) } bytes, _ := EncodeDecimal(decimal) return bytes diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index 236b665b7..ea79bd829 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -49,6 +49,26 @@ func mustParseDecimal(value string) *apd.Decimal { return decimal } +func TestDecimalWithNewExponent(t *testing.T) { + assert.Equal(t, "0", decimalWithNewExponent(apd.New(0, 0), 0).Text('f')) + assert.Equal(t, "00", decimalWithNewExponent(apd.New(0, 1), 1).Text('f')) + assert.Equal(t, "0", decimalWithNewExponent(apd.New(0, 100), 0).Text('f')) + assert.Equal(t, "00", decimalWithNewExponent(apd.New(0, 0), 1).Text('f')) + assert.Equal(t, "0.0", decimalWithNewExponent(apd.New(0, 0), -1).Text('f')) + + // Same exponent: + assert.Equal(t, "12.349", decimalWithNewExponent(mustParseDecimal("12.349"), -3).Text('f')) + // More precise exponent: + assert.Equal(t, "12.3490", decimalWithNewExponent(mustParseDecimal("12.349"), -4).Text('f')) + assert.Equal(t, "12.34900", decimalWithNewExponent(mustParseDecimal("12.349"), -5).Text('f')) + // Lest precise exponent: + // Extra digits should be truncated rather than rounded. + assert.Equal(t, "12.34", decimalWithNewExponent(mustParseDecimal("12.349"), -2).Text('f')) + assert.Equal(t, "12.3", decimalWithNewExponent(mustParseDecimal("12.349"), -1).Text('f')) + assert.Equal(t, "12", decimalWithNewExponent(mustParseDecimal("12.349"), 0).Text('f')) + assert.Equal(t, "10", decimalWithNewExponent(mustParseDecimal("12.349"), 1).Text('f')) +} + func TestEncodeDecimal(t *testing.T) { testEncodeDecimal := func(value string, expectedScale int32) { bytes, scale := EncodeDecimal(mustParseDecimal(value)) diff --git a/lib/numbers/decimal.go b/lib/numbers/decimal.go deleted file mode 100644 index e042fe447..000000000 --- a/lib/numbers/decimal.go +++ /dev/null @@ -1,39 +0,0 @@ -package numbers - -import "github.com/cockroachdb/apd/v3" - -// MustParseDecimal parses a string to an [apd.Decimal] or panics -- used for tests. -func MustParseDecimal(value string) *apd.Decimal { - decimal, _, err := apd.NewFromString(value) - if err != nil { - panic(err) - } - return decimal -} - -// DecimalWithNewExponent takes a [apd.Decimal] and returns a new [apd.Decimal] with a the given exponent. -// If the new exponent is less precise then the extra digits will be truncated. -func DecimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decimal { - exponentDelta := newExponent - decimal.Exponent // Exponent is negative. - - if exponentDelta == 0 { - return new(apd.Decimal).Set(decimal) - } - - coefficient := new(apd.BigInt).Set(&decimal.Coeff) - - if exponentDelta < 0 { - multiplier := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(-exponentDelta)), nil) - coefficient.Mul(coefficient, multiplier) - } else if exponentDelta > 0 { - divisor := new(apd.BigInt).Exp(apd.NewBigInt(10), apd.NewBigInt(int64(exponentDelta)), nil) - coefficient.Div(coefficient, divisor) - } - - return &apd.Decimal{ - Form: decimal.Form, - Negative: decimal.Negative, - Exponent: newExponent, - Coeff: *coefficient, - } -} diff --git a/lib/numbers/decimal_test.go b/lib/numbers/decimal_test.go deleted file mode 100644 index a7ec21868..000000000 --- a/lib/numbers/decimal_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package numbers - -import ( - "testing" - - "github.com/cockroachdb/apd/v3" - "github.com/stretchr/testify/assert" -) - -func TestDecimalWithNewExponent(t *testing.T) { - assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 0), 0).Text('f')) - assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 1), 1).Text('f')) - assert.Equal(t, "0", DecimalWithNewExponent(apd.New(0, 100), 0).Text('f')) - assert.Equal(t, "00", DecimalWithNewExponent(apd.New(0, 0), 1).Text('f')) - assert.Equal(t, "0.0", DecimalWithNewExponent(apd.New(0, 0), -1).Text('f')) - - // Same exponent: - assert.Equal(t, "12.349", DecimalWithNewExponent(MustParseDecimal("12.349"), -3).Text('f')) - // More precise exponent: - assert.Equal(t, "12.3490", DecimalWithNewExponent(MustParseDecimal("12.349"), -4).Text('f')) - assert.Equal(t, "12.34900", DecimalWithNewExponent(MustParseDecimal("12.349"), -5).Text('f')) - // Lest precise exponent: - // Extra digits should be truncated rather than rounded. - assert.Equal(t, "12.34", DecimalWithNewExponent(MustParseDecimal("12.349"), -2).Text('f')) - assert.Equal(t, "12.3", DecimalWithNewExponent(MustParseDecimal("12.349"), -1).Text('f')) - assert.Equal(t, "12", DecimalWithNewExponent(MustParseDecimal("12.349"), 0).Text('f')) - assert.Equal(t, "10", DecimalWithNewExponent(MustParseDecimal("12.349"), 1).Text('f')) -} From 48d86484177eaf9765411dbed9bcccac0792e780 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:14:13 -0700 Subject: [PATCH 17/26] Revert --- lib/debezium/types_test.go | 12 ++++++++++++ lib/typing/decimal/decimal.go | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/lib/debezium/types_test.go b/lib/debezium/types_test.go index 27948d4c3..cd0a3037a 100644 --- a/lib/debezium/types_test.go +++ b/lib/debezium/types_test.go @@ -1,6 +1,7 @@ package debezium import ( + "math/big" "testing" "time" @@ -617,6 +618,9 @@ func TestField_DecodeDecimal(t *testing.T) { } assert.NoError(t, err) + decVal := dec.Value() + _, isOk := decVal.(*big.Float) + assert.Equal(t, testCase.expectBigFloat, isOk, testCase.name) assert.Equal(t, testCase.expectedValue, dec.String(), testCase.name) if testCase.expectNilPtrPrecision { @@ -709,8 +713,16 @@ func TestField_DecodeDebeziumVariableDecimal(t *testing.T) { continue } + // It should never be a *big.Float + _, isOk := dec.Value().(*big.Float) + assert.False(t, isOk, testCase.name) + + // It should be a string instead. + _, isOk = dec.Value().(string) + assert.True(t, isOk, testCase.name) assert.Equal(t, -1, *dec.Precision(), testCase.name) assert.Equal(t, testCase.expectedScale, dec.Scale(), testCase.name) + assert.Equal(t, testCase.expectedValue, dec.Value(), testCase.name) assert.Equal(t, testCase.expectedValue, dec.String(), testCase.name) } diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index 06c83b2fa..d68047ad8 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -1,6 +1,9 @@ package decimal import ( + "log/slog" + "math/big" + "github.com/artie-labs/transfer/lib/ptr" "github.com/cockroachdb/apd/v3" ) @@ -53,6 +56,22 @@ func (d *Decimal) String() string { return d.value.Text('f') } +func (d *Decimal) Value() any { + // -1 precision is used for variable scaled decimal + // We are opting to emit this as a STRING because the value is technically unbounded (can get to ~1 GB). + if d.precision != nil && (*d.precision > MaxPrecisionBeforeString || *d.precision == -1) { + return d.String() + } + + // Depending on the precision, we will want to convert value to STRING or keep as a FLOAT. + // TODO: [Value] is only called in one place, look into callining [String] instead. + if out, ok := new(big.Float).SetString(d.String()); ok { + return out + } + slog.Error("Failed to convert apd.Decimal to big.Float", slog.String("value", d.String())) + return d.String() +} + func (d *Decimal) Details() DecimalDetails { return DecimalDetails{scale: d.Scale(), precision: d.precision} } From a487ebabc74f76de816ebb45ba331b9712e6cf42 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:15:02 -0700 Subject: [PATCH 18/26] Revert --- clients/shared/utils.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clients/shared/utils.go b/clients/shared/utils.go index e80e17a76..1b7022053 100644 --- a/clients/shared/utils.go +++ b/clients/shared/utils.go @@ -44,7 +44,8 @@ func DefaultValue(column columns.Column, dialect sql.Dialect, additionalDateFmts return nil, fmt.Errorf("colVal is not type *decimal.Decimal") } - return val.String(), nil + // TODO: Call [String] instead. + return val.Value(), nil case typing.String.Kind: return sql.QuoteLiteral(fmt.Sprint(column.DefaultValue())), nil } From e9413f40263cc67f412b7d4a4dc50f0de118894c Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:17:15 -0700 Subject: [PATCH 19/26] Add helper method --- clients/bigquery/storagewrite_test.go | 14 +++----------- lib/debezium/decimal_test.go | 25 +++++++++---------------- lib/numbers/numbers.go | 11 +++++++++++ 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/clients/bigquery/storagewrite_test.go b/clients/bigquery/storagewrite_test.go index 7a0c5c073..23f66115f 100644 --- a/clients/bigquery/storagewrite_test.go +++ b/clients/bigquery/storagewrite_test.go @@ -6,11 +6,11 @@ import ( "time" "cloud.google.com/go/bigquery/storage/apiv1/storagepb" + "github.com/artie-labs/transfer/lib/numbers" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" "github.com/artie-labs/transfer/lib/typing/decimal" "github.com/artie-labs/transfer/lib/typing/ext" - "github.com/cockroachdb/apd/v3" "github.com/stretchr/testify/assert" "google.golang.org/protobuf/encoding/protojson" ) @@ -98,14 +98,6 @@ func TestEncodePacked64TimeMicros(t *testing.T) { assert.Equal(t, int64(1<<32+1000), encodePacked64TimeMicros(epoch.Add(time.Duration(1)*time.Hour+time.Duration(1)*time.Millisecond))) } -func mustParseDecimal(value string) *apd.Decimal { - decimal, _, err := apd.NewFromString(value) - if err != nil { - panic(err) - } - return decimal -} - func TestRowToMessage(t *testing.T) { columns := []columns.Column{ columns.NewColumn("c_bool", typing.Boolean), @@ -137,9 +129,9 @@ func TestRowToMessage(t *testing.T) { "c_float_int32": int32(1234), "c_float_int64": int64(1234), "c_float_string": "4444.55555", - "c_numeric": decimal.NewDecimal(nil, mustParseDecimal("3.14159")), + "c_numeric": decimal.NewDecimal(nil, numbers.MustParseDecimal("3.14159")), "c_string": "foo bar", - "c_string_decimal": decimal.NewDecimal(nil, mustParseDecimal("1.61803")), + "c_string_decimal": decimal.NewDecimal(nil, numbers.MustParseDecimal("1.61803")), "c_time": ext.NewExtendedTime(time.Date(0, 0, 0, 4, 5, 6, 7, time.UTC), ext.TimeKindType, ""), "c_date": ext.NewExtendedTime(time.Date(2001, 2, 3, 0, 0, 0, 0, time.UTC), ext.DateKindType, ""), "c_datetime": ext.NewExtendedTime(time.Date(2001, 2, 3, 4, 5, 6, 7, time.UTC), ext.DateTimeKindType, ""), diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index ea79bd829..a3d0f6f42 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -6,6 +6,7 @@ import ( "math/big" "testing" + "github.com/artie-labs/transfer/lib/numbers" "github.com/cockroachdb/apd/v3" "github.com/stretchr/testify/assert" ) @@ -41,14 +42,6 @@ func TestDecodeBigInt(t *testing.T) { } } -func mustParseDecimal(value string) *apd.Decimal { - decimal, _, err := apd.NewFromString(value) - if err != nil { - panic(err) - } - return decimal -} - func TestDecimalWithNewExponent(t *testing.T) { assert.Equal(t, "0", decimalWithNewExponent(apd.New(0, 0), 0).Text('f')) assert.Equal(t, "00", decimalWithNewExponent(apd.New(0, 1), 1).Text('f')) @@ -57,21 +50,21 @@ func TestDecimalWithNewExponent(t *testing.T) { assert.Equal(t, "0.0", decimalWithNewExponent(apd.New(0, 0), -1).Text('f')) // Same exponent: - assert.Equal(t, "12.349", decimalWithNewExponent(mustParseDecimal("12.349"), -3).Text('f')) + assert.Equal(t, "12.349", decimalWithNewExponent(numbers.MustParseDecimal("12.349"), -3).Text('f')) // More precise exponent: - assert.Equal(t, "12.3490", decimalWithNewExponent(mustParseDecimal("12.349"), -4).Text('f')) - assert.Equal(t, "12.34900", decimalWithNewExponent(mustParseDecimal("12.349"), -5).Text('f')) + assert.Equal(t, "12.3490", decimalWithNewExponent(numbers.MustParseDecimal("12.349"), -4).Text('f')) + assert.Equal(t, "12.34900", decimalWithNewExponent(numbers.MustParseDecimal("12.349"), -5).Text('f')) // Lest precise exponent: // Extra digits should be truncated rather than rounded. - assert.Equal(t, "12.34", decimalWithNewExponent(mustParseDecimal("12.349"), -2).Text('f')) - assert.Equal(t, "12.3", decimalWithNewExponent(mustParseDecimal("12.349"), -1).Text('f')) - assert.Equal(t, "12", decimalWithNewExponent(mustParseDecimal("12.349"), 0).Text('f')) - assert.Equal(t, "10", decimalWithNewExponent(mustParseDecimal("12.349"), 1).Text('f')) + assert.Equal(t, "12.34", decimalWithNewExponent(numbers.MustParseDecimal("12.349"), -2).Text('f')) + assert.Equal(t, "12.3", decimalWithNewExponent(numbers.MustParseDecimal("12.349"), -1).Text('f')) + assert.Equal(t, "12", decimalWithNewExponent(numbers.MustParseDecimal("12.349"), 0).Text('f')) + assert.Equal(t, "10", decimalWithNewExponent(numbers.MustParseDecimal("12.349"), 1).Text('f')) } func TestEncodeDecimal(t *testing.T) { testEncodeDecimal := func(value string, expectedScale int32) { - bytes, scale := EncodeDecimal(mustParseDecimal(value)) + bytes, scale := EncodeDecimal(numbers.MustParseDecimal(value)) assert.Equal(t, expectedScale, scale, value) actual := DecodeDecimal(bytes, scale) diff --git a/lib/numbers/numbers.go b/lib/numbers/numbers.go index f414bbd83..8954fc1c3 100644 --- a/lib/numbers/numbers.go +++ b/lib/numbers/numbers.go @@ -1,6 +1,17 @@ package numbers +import "github.com/cockroachdb/apd/v3" + // BetweenEq - Looks something like this. start <= number <= end func BetweenEq[T int | int32 | int64](start, end, number T) bool { return number >= start && number <= end } + +// MustParseDecimal parses a string to an [apd.Decimal] or panics -- used for tests. +func MustParseDecimal(value string) *apd.Decimal { + decimal, _, err := apd.NewFromString(value) + if err != nil { + panic(err) + } + return decimal +} From 7b9ef83a699902b5118967457100fbf2ab6b7d82 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:19:54 -0700 Subject: [PATCH 20/26] Revert --- lib/cdc/util/relational_event_decimal_test.go | 31 +++++++++++++------ lib/debezium/decimal_test.go | 2 +- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/lib/cdc/util/relational_event_decimal_test.go b/lib/cdc/util/relational_event_decimal_test.go index 590050c79..ad287190e 100644 --- a/lib/cdc/util/relational_event_decimal_test.go +++ b/lib/cdc/util/relational_event_decimal_test.go @@ -3,6 +3,7 @@ package util import ( "encoding/json" "io" + "math/big" "os" "testing" @@ -46,8 +47,8 @@ func TestSchemaEventPayload_Numeric_GetData(t *testing.T) { retMap, err := schemaEventPayload.GetData(nil, &kafkalib.TopicConfig{}) assert.NoError(t, err) - assert.Equal(t, "123456.789", retMap["numeric_test"].(*decimal.Decimal).String()) - assert.Equal(t, "1234", retMap["numeric_5"].(*decimal.Decimal).String()) + assert.Equal(t, "123456.789", retMap["numeric_test"].(*decimal.Decimal).Value()) + assert.Equal(t, 0, big.NewFloat(1234).Cmp(retMap["numeric_5"].(*decimal.Decimal).Value().(*big.Float))) numericWithScaleMap := map[string]string{ "numeric_5_2": "568.01", "numeric_5_6": "0.023456", @@ -55,12 +56,16 @@ func TestSchemaEventPayload_Numeric_GetData(t *testing.T) { } for key, expectedValue := range numericWithScaleMap { + // Numeric data types that actually have scale fails when comparing *big.Float using `.Cmp`, so we are using STRING() instead. + _, isOk := retMap[key].(*decimal.Decimal).Value().(*big.Float) + assert.True(t, isOk) + // Now, we know the data type is *big.Float, let's check the .String() value. assert.Equal(t, expectedValue, retMap[key].(*decimal.Decimal).String()) } - assert.Equal(t, "58569102859845154622791691858438258688", retMap["numeric_39_0"].(*decimal.Decimal).String()) - assert.Equal(t, "5856910285984515462279169185843825868.22", retMap["numeric_39_2"].(*decimal.Decimal).String()) - assert.Equal(t, "585691028598451546227958438258688.123456", retMap["numeric_39_6"].(*decimal.Decimal).String()) + assert.Equal(t, "58569102859845154622791691858438258688", retMap["numeric_39_0"].(*decimal.Decimal).Value()) + assert.Equal(t, "5856910285984515462279169185843825868.22", retMap["numeric_39_2"].(*decimal.Decimal).Value()) + assert.Equal(t, "585691028598451546227958438258688.123456", retMap["numeric_39_6"].(*decimal.Decimal).Value()) } func TestSchemaEventPayload_Decimal_GetData(t *testing.T) { @@ -74,19 +79,23 @@ func TestSchemaEventPayload_Decimal_GetData(t *testing.T) { assert.NoError(t, err) retMap, err := schemaEventPayload.GetData(nil, &kafkalib.TopicConfig{}) assert.NoError(t, err) - assert.Equal(t, "123.45", retMap["decimal_test"].(*decimal.Decimal).String()) + assert.Equal(t, "123.45", retMap["decimal_test"].(*decimal.Decimal).Value()) decimalWithScaleMap := map[string]string{ "decimal_test_5": "123", "decimal_test_5_2": "123.45", } for key, expectedValue := range decimalWithScaleMap { + // Numeric data types that actually have scale fails when comparing *big.Float using `.Cmp`, so we are using STRING() instead. + _, isOk := retMap[key].(*decimal.Decimal).Value().(*big.Float) + assert.True(t, isOk) + // Now, we know the data type is *big.Float, let's check the .String() value. assert.Equal(t, expectedValue, retMap[key].(*decimal.Decimal).String(), key) } - assert.Equal(t, "58569102859845154622791691858438258688", retMap["decimal_test_39"].(*decimal.Decimal).String(), "decimal_test_39") - assert.Equal(t, "585691028598451546227916918584382586.22", retMap["decimal_test_39_2"].(*decimal.Decimal).String(), "decimal_test_39_2") - assert.Equal(t, "585691028598451546227916918584388.123456", retMap["decimal_test_39_6"].(*decimal.Decimal).String(), "decimal_test_39_6") + assert.Equal(t, "58569102859845154622791691858438258688", retMap["decimal_test_39"].(*decimal.Decimal).Value(), "decimal_test_39") + assert.Equal(t, "585691028598451546227916918584382586.22", retMap["decimal_test_39_2"].(*decimal.Decimal).Value(), "decimal_test_39_2") + assert.Equal(t, "585691028598451546227916918584388.123456", retMap["decimal_test_39_6"].(*decimal.Decimal).Value(), "decimal_test_39_6") } func TestSchemaEventPayload_Money_GetData(t *testing.T) { @@ -106,6 +115,10 @@ func TestSchemaEventPayload_Money_GetData(t *testing.T) { } for key, expectedValue := range decimalWithScaleMap { + // Numeric data types that actually have scale fails when comparing *big.Float using `.Cmp`, so we are using STRING() instead. + _, isOk := retMap[key].(*decimal.Decimal).Value().(*big.Float) + assert.True(t, isOk) + // Now, we know the data type is *big.Float, let's check the .String() value. assert.Equal(t, expectedValue, retMap[key].(*decimal.Decimal).String(), key) } } diff --git a/lib/debezium/decimal_test.go b/lib/debezium/decimal_test.go index a3d0f6f42..931468710 100644 --- a/lib/debezium/decimal_test.go +++ b/lib/debezium/decimal_test.go @@ -86,7 +86,7 @@ func TestEncodeDecimal(t *testing.T) { func TestEncodeDecimalWithScale(t *testing.T) { mustEncodeAndDecodeDecimal := func(value string, scale int32) string { - bytes := EncodeDecimalWithScale(mustParseDecimal(value), scale) + bytes := EncodeDecimalWithScale(numbers.MustParseDecimal(value), scale) return DecodeDecimal(bytes, scale).String() } From fab0b1b7e0ff0321be67ea22e119d760551a2588 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:33:01 -0700 Subject: [PATCH 21/26] Revert --- lib/typing/values/string_test.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/typing/values/string_test.go b/lib/typing/values/string_test.go index af33186d3..945a8f756 100644 --- a/lib/typing/values/string_test.go +++ b/lib/typing/values/string_test.go @@ -4,10 +4,10 @@ import ( "testing" "time" - "github.com/cockroachdb/apd/v3" "github.com/stretchr/testify/assert" "github.com/artie-labs/transfer/lib/config/constants" + "github.com/artie-labs/transfer/lib/numbers" "github.com/artie-labs/transfer/lib/ptr" "github.com/artie-labs/transfer/lib/typing" "github.com/artie-labs/transfer/lib/typing/columns" @@ -123,9 +123,7 @@ func TestToString(t *testing.T) { assert.Equal(t, "123.45", val) // Decimals - _decimal, _, err := apd.NewFromString("585692791691858.25") - assert.NoError(t, err) - value := decimal.NewDecimal(ptr.ToInt(38), _decimal) + value := decimal.NewDecimal(ptr.ToInt(38), numbers.MustParseDecimal("585692791691858.25")) val, err = ToString(value, columns.Column{KindDetails: typing.EDecimal}, nil) assert.NoError(t, err) assert.Equal(t, "585692791691858.25", val) From 4b3327c83093a6ecee9e505071903f7c067022c9 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:35:37 -0700 Subject: [PATCH 22/26] Revert things --- lib/debezium/types_test.go | 1 - lib/typing/decimal/decimal.go | 7 +++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/debezium/types_test.go b/lib/debezium/types_test.go index cd0a3037a..2cbcaef10 100644 --- a/lib/debezium/types_test.go +++ b/lib/debezium/types_test.go @@ -723,7 +723,6 @@ func TestField_DecodeDebeziumVariableDecimal(t *testing.T) { assert.Equal(t, -1, *dec.Precision(), testCase.name) assert.Equal(t, testCase.expectedScale, dec.Scale(), testCase.name) assert.Equal(t, testCase.expectedValue, dec.Value(), testCase.name) - assert.Equal(t, testCase.expectedValue, dec.String(), testCase.name) } } diff --git a/lib/typing/decimal/decimal.go b/lib/typing/decimal/decimal.go index d68047ad8..00ba25949 100644 --- a/lib/typing/decimal/decimal.go +++ b/lib/typing/decimal/decimal.go @@ -8,7 +8,7 @@ import ( "github.com/cockroachdb/apd/v3" ) -// Decimal is Artie's wrapper around [apd.Decimal] which can store large numbers w/ no precision loss. +// Decimal is Artie's wrapper around [*apd.Decimal] which can store large numbers w/ no precision loss. type Decimal struct { precision *int value *apd.Decimal @@ -23,9 +23,8 @@ const ( ) func NewDecimal(precision *int, value *apd.Decimal) *Decimal { - scale := int(-value.Exponent) - if precision != nil { + scale := int(-value.Exponent) if scale > *precision && *precision != -1 { // Note: -1 precision means it's not specified. @@ -64,7 +63,7 @@ func (d *Decimal) Value() any { } // Depending on the precision, we will want to convert value to STRING or keep as a FLOAT. - // TODO: [Value] is only called in one place, look into callining [String] instead. + // TODO: [Value] is only called in one place, look into calling [String] instead. if out, ok := new(big.Float).SetString(d.String()); ok { return out } From 8ab7bf0ce008fb74d76255715ea727e734b74593 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:37:36 -0700 Subject: [PATCH 23/26] Revert --- lib/debezium/decimal.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/debezium/decimal.go b/lib/debezium/decimal.go index c7c4aecad..0b3c9a931 100644 --- a/lib/debezium/decimal.go +++ b/lib/debezium/decimal.go @@ -88,7 +88,7 @@ func decimalWithNewExponent(decimal *apd.Decimal, newExponent int32) *apd.Decima } } -// EncodeDecimal is used to encode a [*apd.Decimal] to [org.apache.kafka.connect.data.Decimal]. +// EncodeDecimal is used to encode a [*apd.Decimal] to `org.apache.kafka.connect.data.Decimal`. // The scale of the value (which is the negated exponent of the decimal) is returned as the second argument. func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { bigIntValue := decimal.Coeff.MathBigInt() @@ -99,7 +99,7 @@ func EncodeDecimal(decimal *apd.Decimal) ([]byte, int32) { return encodeBigInt(bigIntValue), -decimal.Exponent } -// EncodeDecimalWithScale is used to encode a [*apd.Decimal] to [org.apache.kafka.connect.data.Decimal] +// EncodeDecimalWithScale is used to encode a [*apd.Decimal] to `org.apache.kafka.connect.data.Decimal` // using a specific scale. func EncodeDecimalWithScale(decimal *apd.Decimal, scale int32) []byte { targetExponent := -scale // Negate scale since [Decimal.Exponent] is negative. @@ -110,7 +110,7 @@ func EncodeDecimalWithScale(decimal *apd.Decimal, scale int32) []byte { return bytes } -// DecodeDecimal is used to decode [org.apache.kafka.connect.data.Decimal]. +// DecodeDecimal is used to decode `org.apache.kafka.connect.data.Decimal`. func DecodeDecimal(data []byte, scale int32) *apd.Decimal { return apd.NewWithBigInt(new(apd.BigInt).SetMathBigInt(decodeBigInt(data)), -scale) } From 8ca4bccd825e4bb4b6226494f7172e6f22ee3aab Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:38:17 -0700 Subject: [PATCH 24/26] Split --- lib/debezium/decimal.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/debezium/decimal.go b/lib/debezium/decimal.go index 0b3c9a931..7e46c997c 100644 --- a/lib/debezium/decimal.go +++ b/lib/debezium/decimal.go @@ -112,5 +112,6 @@ func EncodeDecimalWithScale(decimal *apd.Decimal, scale int32) []byte { // DecodeDecimal is used to decode `org.apache.kafka.connect.data.Decimal`. func DecodeDecimal(data []byte, scale int32) *apd.Decimal { - return apd.NewWithBigInt(new(apd.BigInt).SetMathBigInt(decodeBigInt(data)), -scale) + bigInt := new(apd.BigInt).SetMathBigInt(decodeBigInt(data)) + return apd.NewWithBigInt(bigInt, -scale) } From b41c61ecb9a2e17c9fb16335748fba0f1749f81f Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:57:38 -0700 Subject: [PATCH 25/26] Tests --- lib/typing/decimal/decimal_test.go | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 lib/typing/decimal/decimal_test.go diff --git a/lib/typing/decimal/decimal_test.go b/lib/typing/decimal/decimal_test.go new file mode 100644 index 000000000..2303788a9 --- /dev/null +++ b/lib/typing/decimal/decimal_test.go @@ -0,0 +1,48 @@ +package decimal + +import ( + "testing" + + "github.com/artie-labs/transfer/lib/numbers" + "github.com/artie-labs/transfer/lib/ptr" + "github.com/stretchr/testify/assert" +) + +func TestNewDecimal(t *testing.T) { + // Nil precision: + assert.Equal(t, "0", NewDecimal(nil, numbers.MustParseDecimal("0")).String()) + // Precision = -1 (PrecisionNotSpecified): + assert.Equal(t, DecimalDetails{scale: 2, precision: ptr.ToInt(-1)}, NewDecimal(ptr.ToInt(-1), numbers.MustParseDecimal("12.34")).Details()) + // Precision = scale: + assert.Equal(t, DecimalDetails{scale: 2, precision: ptr.ToInt(2)}, NewDecimal(ptr.ToInt(2), numbers.MustParseDecimal("12.34")).Details()) + // Precision < scale: + assert.Equal(t, DecimalDetails{scale: 2, precision: ptr.ToInt(3)}, NewDecimal(ptr.ToInt(1), numbers.MustParseDecimal("12.34")).Details()) + // Precision > scale: + assert.Equal(t, DecimalDetails{scale: 2, precision: ptr.ToInt(4)}, NewDecimal(ptr.ToInt(4), numbers.MustParseDecimal("12.34")).Details()) +} + +func TestDecimal_Scale(t *testing.T) { + assert.Equal(t, 0, NewDecimal(nil, numbers.MustParseDecimal("0")).Scale()) + assert.Equal(t, 0, NewDecimal(nil, numbers.MustParseDecimal("12345")).Scale()) + assert.Equal(t, 0, NewDecimal(nil, numbers.MustParseDecimal("12300")).Scale()) + assert.Equal(t, 1, NewDecimal(nil, numbers.MustParseDecimal("12300.0")).Scale()) + assert.Equal(t, 2, NewDecimal(nil, numbers.MustParseDecimal("12300.00")).Scale()) + assert.Equal(t, 2, NewDecimal(nil, numbers.MustParseDecimal("12345.12")).Scale()) + assert.Equal(t, 3, NewDecimal(nil, numbers.MustParseDecimal("-12345.123")).Scale()) +} + +func TestDecimal_Details(t *testing.T) { + // Nil precision: + assert.Equal(t, DecimalDetails{scale: 0}, NewDecimal(nil, numbers.MustParseDecimal("0")).Details()) + assert.Equal(t, DecimalDetails{scale: 0}, NewDecimal(nil, numbers.MustParseDecimal("12345")).Details()) + assert.Equal(t, DecimalDetails{scale: 0}, NewDecimal(nil, numbers.MustParseDecimal("-12")).Details()) + assert.Equal(t, DecimalDetails{scale: 2}, NewDecimal(nil, numbers.MustParseDecimal("12345.12")).Details()) + assert.Equal(t, DecimalDetails{scale: 3}, NewDecimal(nil, numbers.MustParseDecimal("-12345.123")).Details()) + + // 10 precision: + assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(10)}, NewDecimal(ptr.ToInt(10), numbers.MustParseDecimal("0")).Details()) + assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(10)}, NewDecimal(ptr.ToInt(10), numbers.MustParseDecimal("12345")).Details()) + assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(10)}, NewDecimal(ptr.ToInt(10), numbers.MustParseDecimal("-12")).Details()) + assert.Equal(t, DecimalDetails{scale: 2, precision: ptr.ToInt(10)}, NewDecimal(ptr.ToInt(10), numbers.MustParseDecimal("12345.12")).Details()) + assert.Equal(t, DecimalDetails{scale: 3, precision: ptr.ToInt(10)}, NewDecimal(ptr.ToInt(10), numbers.MustParseDecimal("-12345.123")).Details()) +} From f23b0a504214be04d49dac40515c90a9fe309f14 Mon Sep 17 00:00:00 2001 From: Nathan <148575555+nathan-artie@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:46:13 -0700 Subject: [PATCH 26/26] More tests --- lib/typing/decimal/decimal_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/typing/decimal/decimal_test.go b/lib/typing/decimal/decimal_test.go index 2303788a9..666761b20 100644 --- a/lib/typing/decimal/decimal_test.go +++ b/lib/typing/decimal/decimal_test.go @@ -39,6 +39,13 @@ func TestDecimal_Details(t *testing.T) { assert.Equal(t, DecimalDetails{scale: 2}, NewDecimal(nil, numbers.MustParseDecimal("12345.12")).Details()) assert.Equal(t, DecimalDetails{scale: 3}, NewDecimal(nil, numbers.MustParseDecimal("-12345.123")).Details()) + // -1 precision (PrecisionNotSpecified): + assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(-1)}, NewDecimal(ptr.ToInt(-1), numbers.MustParseDecimal("0")).Details()) + assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(-1)}, NewDecimal(ptr.ToInt(-1), numbers.MustParseDecimal("12345")).Details()) + assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(-1)}, NewDecimal(ptr.ToInt(-1), numbers.MustParseDecimal("-12")).Details()) + assert.Equal(t, DecimalDetails{scale: 2, precision: ptr.ToInt(-1)}, NewDecimal(ptr.ToInt(-1), numbers.MustParseDecimal("12345.12")).Details()) + assert.Equal(t, DecimalDetails{scale: 3, precision: ptr.ToInt(-1)}, NewDecimal(ptr.ToInt(-1), numbers.MustParseDecimal("-12345.123")).Details()) + // 10 precision: assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(10)}, NewDecimal(ptr.ToInt(10), numbers.MustParseDecimal("0")).Details()) assert.Equal(t, DecimalDetails{scale: 0, precision: ptr.ToInt(10)}, NewDecimal(ptr.ToInt(10), numbers.MustParseDecimal("12345")).Details())