From 68f2869bc392cfb14fc02df9a4739dc2acf3384a Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Sun, 5 Mar 2023 11:06:21 -0800 Subject: [PATCH] Spacing working as expected. (#59) --- lib/stringutil/strings.go | 5 +++++ lib/stringutil/strings_test.go | 14 ++++++++++++++ models/memory.go | 13 +++++++++++++ 3 files changed, 32 insertions(+) diff --git a/lib/stringutil/strings.go b/lib/stringutil/strings.go index 4329e9be7..852bc0daa 100644 --- a/lib/stringutil/strings.go +++ b/lib/stringutil/strings.go @@ -31,3 +31,8 @@ func Empty(vals ...string) bool { return false } + +func EscapeSpaces(col string) (escaped bool, newString string) { + subStr := " " + return strings.Contains(col, subStr), strings.ReplaceAll(col, subStr, "__") +} diff --git a/lib/stringutil/strings_test.go b/lib/stringutil/strings_test.go index d57ac0129..b281c3599 100644 --- a/lib/stringutil/strings_test.go +++ b/lib/stringutil/strings_test.go @@ -26,3 +26,17 @@ func TestEmpty(t *testing.T) { assert.True(t, Empty("robin", "jacqueline", "charlie", "")) assert.True(t, Empty("")) } + +func TestEscapeSpaces(t *testing.T) { + colsToExpectation := map[string]map[string]interface{}{ + "columnA": {"escaped": "columnA", "space": false}, + "column_a": {"escaped": "column_a", "space": false}, + "column a": {"escaped": "column__a", "space": true}, + } + + for col, expected := range colsToExpectation { + containsSpace, escapedString := EscapeSpaces(col) + assert.Equal(t, expected["escaped"], escapedString) + assert.Equal(t, expected["space"], containsSpace) + } +} diff --git a/models/memory.go b/models/memory.go index 9baaa9a1c..710f096a5 100644 --- a/models/memory.go +++ b/models/memory.go @@ -6,6 +6,7 @@ import ( "github.com/artie-labs/transfer/lib/config/constants" "github.com/artie-labs/transfer/lib/kafkalib" "github.com/artie-labs/transfer/lib/optimization" + "github.com/artie-labs/transfer/lib/stringutil" "github.com/artie-labs/transfer/lib/typing" "github.com/segmentio/kafka-go" "sync" @@ -67,6 +68,18 @@ func (e *Event) Save(topicConfig *kafkalib.TopicConfig, message kafka.Message) ( // Update col if necessary for col, val := range e.Data { + // Columns here could contain spaces. Every destination treats spaces in a column differently. + // So far, Snowflake accepts them when escaped properly, however BigQuery does not accept it. + // Instead of making this more complicated for future destinations, we will escape the spaces by having double underscore `__` + // So, if customers want to retrieve spaces again, they can replace `__`. + + var containsSpace bool + containsSpace, col = stringutil.EscapeSpaces(col) + if containsSpace { + // Write the message back if the column has changed. + e.Data[col] = val + } + if val == "__debezium_unavailable_value" { // This is an edge case within Postgres & ORCL // TL;DR - Sometimes a column that is unchanged within a DML will not be emitted