Skip to content

Commit

Permalink
Gracefully dealing with MongoDB NaN errors (#154)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 authored Jul 18, 2023
1 parent 2be4db0 commit 9e6ba99
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 6 deletions.
7 changes: 4 additions & 3 deletions lib/cdc/mongo/debezium.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package mongo
import (
"context"
"encoding/json"
"fmt"
"time"

"github.com/artie-labs/transfer/lib/typing/ext"
Expand Down Expand Up @@ -30,7 +31,7 @@ func (d *Debezium) GetEventFromBytes(_ context.Context, bytes []byte) (cdc.Event

err := json.Unmarshal(bytes, &schemaEventPayload)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to unmarshal json, err: %v", err)
}

// Now marshal before & after string.
Expand All @@ -46,7 +47,7 @@ func (d *Debezium) GetEventFromBytes(_ context.Context, bytes []byte) (cdc.Event
if schemaEventPayload.Payload.After != nil {
after, err := mongo.JSONEToMap([]byte(*schemaEventPayload.Payload.After))
if err != nil {
return nil, err
return nil, fmt.Errorf("mongo JSONEToMap err: %v", err)
}

// Now, we need to iterate over each key and if the value is JSON
Expand All @@ -55,7 +56,7 @@ func (d *Debezium) GetEventFromBytes(_ context.Context, bytes []byte) (cdc.Event
if typing.ParseValue(key, nil, value) == typing.Struct {
valBytes, err := json.Marshal(value)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to marshal, err: %v", err)
}

after[key] = string(valBytes)
Expand Down
14 changes: 13 additions & 1 deletion lib/typing/mongo/bson.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import (
"encoding/json"
"fmt"
"reflect"
"regexp"
"strconv"
"strings"
"time"

"github.com/google/uuid"
Expand All @@ -19,11 +21,21 @@ import (
// JSONEToMap will take JSONE data in bytes, parse all the custom types
// Then from all the custom types,
func JSONEToMap(val []byte) (map[string]interface{}, error) {
// RegEx on the actual value of `NaN` only (raw value and quotes).
// This is because we cannot use RegEx to find only NaN.
re := regexp.MustCompile(`\bNaN\b|"\bNaN\b"`)
val = []byte(re.ReplaceAllStringFunc(string(val), func(match string) string {
if strings.Contains(match, "\"") {
return match
}
return "null"
}))

var jsonMap map[string]interface{}
var bsonDoc bson.D
err := bson.UnmarshalExtJSON(val, false, &bsonDoc)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to unmarshal ext json, err: %v", err)
}

bytes, err := bson.MarshalExtJSONWithRegistry(createCustomRegistry().Build(),
Expand Down
11 changes: 9 additions & 2 deletions lib/typing/mongo/bson_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package mongo

import (
"github.com/stretchr/testify/assert"
"testing"

"github.com/stretchr/testify/assert"
)

// TestMarshal, every single type is listed here: https://github.com/mongodb/specifications/blob/master/source/extended-json.rst#canonical-extended-json-example
Expand Down Expand Up @@ -45,7 +46,10 @@ func TestMarshal(t *testing.T) {
},
"test_timestamp": {
"$timestamp": { "t": 1678929517, "i": 1 }
}
},
"test_nan": NaN,
"test_nan_string": "NaN",
"test_nan_string33": "NaNaNaNa"
}
`)
result, err := JSONEToMap(bsonData)
Expand All @@ -66,4 +70,7 @@ func TestMarshal(t *testing.T) {
assert.Equal(t, result["test_list"], []interface{}{float64(1), float64(2), float64(3), float64(4), "hello"})
assert.Equal(t, result["test_nested_object"], map[string]interface{}{"a": map[string]interface{}{"b": map[string]interface{}{"c": "hello"}}})
assert.Equal(t, "2023-03-16T01:18:37+00:00", result["test_timestamp"])
assert.Equal(t, nil, result["test_nan"])
assert.Equal(t, "NaN", result["test_nan_string"]) // This should not be escaped.
assert.Equal(t, "NaNaNaNa", result["test_nan_string33"])
}

0 comments on commit 9e6ba99

Please sign in to comment.