diff --git a/go.mod b/go.mod index 8af5c278023..81b574dcc76 100644 --- a/go.mod +++ b/go.mod @@ -41,6 +41,7 @@ require ( github.com/databricks/databricks-sql-go v1.6.1 github.com/denisenkom/go-mssqldb v0.12.3 github.com/dgraph-io/badger/v4 v4.3.1 + github.com/dlclark/regexp2 v1.11.4 github.com/docker/docker v27.3.1+incompatible github.com/go-chi/chi/v5 v5.1.0 github.com/go-redis/redis v6.15.9+incompatible @@ -76,7 +77,7 @@ require ( github.com/rudderlabs/analytics-go v3.3.3+incompatible github.com/rudderlabs/bing-ads-go-sdk v0.2.3 github.com/rudderlabs/compose-test v0.1.3 - github.com/rudderlabs/rudder-go-kit v0.43.0 + github.com/rudderlabs/rudder-go-kit v0.43.1-0.20241017045502-08a98c5f8442 github.com/rudderlabs/rudder-observability-kit v0.0.3 github.com/rudderlabs/rudder-schemas v0.5.3 github.com/rudderlabs/rudder-transformer/go v0.0.0-20240910055720-f77d2ab4125a @@ -187,7 +188,6 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dgraph-io/ristretto v1.0.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect - github.com/dlclark/regexp2 v1.11.4 // indirect github.com/dnephin/pflag v1.0.7 // indirect github.com/docker/cli v27.2.1+incompatible // indirect github.com/docker/cli-docs-tool v0.8.0 // indirect diff --git a/go.sum b/go.sum index 991eecef655..ebbe6691b31 100644 --- a/go.sum +++ b/go.sum @@ -1168,8 +1168,8 @@ github.com/rudderlabs/goqu/v10 v10.3.1 h1:rnfX+b4EwBWQ2UQfIGeEW299JBBkK5biEbnf7K github.com/rudderlabs/goqu/v10 v10.3.1/go.mod h1:LH2vI5gGHBxEQuESqFyk5ZA2anGINc8o25hbidDWOYw= github.com/rudderlabs/parquet-go v0.0.2 h1:ZXRdZdimB0PdJtmxeSSxfI0fDQ3kZjwzBxRi6Ut1J8k= github.com/rudderlabs/parquet-go v0.0.2/go.mod h1:g6guum7o8uhj/uNhunnt7bw5Vabu/goI5i21/3fnxWQ= -github.com/rudderlabs/rudder-go-kit v0.43.0 h1:N6CAvQdjufitdiUl424+AcMebEmieB0TO5PhARwXvw8= -github.com/rudderlabs/rudder-go-kit v0.43.0/go.mod h1:NrHCi0KSzHSMFXQu0t2kgJcE4ClAKklVXfb2glADvQ4= +github.com/rudderlabs/rudder-go-kit v0.43.1-0.20241017045502-08a98c5f8442 h1:WAYL/6chiRSIeKwSNGd9sclWNWbKBwenGbUhiyxQIi4= +github.com/rudderlabs/rudder-go-kit v0.43.1-0.20241017045502-08a98c5f8442/go.mod h1:NrHCi0KSzHSMFXQu0t2kgJcE4ClAKklVXfb2glADvQ4= github.com/rudderlabs/rudder-observability-kit v0.0.3 h1:vZtuZRkGX+6rjaeKtxxFE2YYP6QlmAcVcgecTOjvz+Q= github.com/rudderlabs/rudder-observability-kit v0.0.3/go.mod h1:6UjAh3H6rkE0fFLh7z8ZGQEQbKtUkRfhWOf/OUhfqW8= github.com/rudderlabs/rudder-schemas v0.5.3 h1:IWWjAo2TzsjwHNhS2EAr1+0MjvA8BoTpJvB2o/GFwNU= diff --git a/processor/transformer/transformer.go b/processor/transformer/transformer.go index 45164410ef6..70b3cb92726 100644 --- a/processor/transformer/transformer.go +++ b/processor/transformer/transformer.go @@ -140,13 +140,25 @@ func WithClient(client *http.Client) Opt { } } -// Transformer provides methods to transform events -type Transformer interface { - Transform(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response +type UserTransformer interface { UserTransform(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response +} + +type DestinationTransformer interface { + Transform(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response +} + +type TrackingPlanValidator interface { Validate(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response } +// Transformer provides methods to transform events +type Transformer interface { + UserTransformer + DestinationTransformer + TrackingPlanValidator +} + // handle is the handle for this class type handle struct { sentStat stats.Measurement @@ -526,7 +538,7 @@ func (trans *handle) destTransformURL(destType string) string { destinationEndPoint := fmt.Sprintf("%s/v0/destinations/%s", trans.config.destTransformationURL, strings.ToLower(destType)) if _, ok := warehouseutils.WarehouseDestinationMap[destType]; ok { - whSchemaVersionQueryParam := fmt.Sprintf("whSchemaVersion=%s&whIDResolve=%v", trans.conf.GetString("Warehouse.schemaVersion", "v1"), warehouseutils.IDResolutionEnabled()) + whSchemaVersionQueryParam := fmt.Sprintf("whIDResolve=%v", trans.conf.GetBool("Warehouse.enableIDResolution", false)) switch destType { case warehouseutils.RS: return destinationEndPoint + "?" + whSchemaVersionQueryParam diff --git a/warehouse/transformer/alias.go b/warehouse/transformer/alias.go new file mode 100644 index 00000000000..a5000ac8a01 --- /dev/null +++ b/warehouse/transformer/alias.go @@ -0,0 +1,58 @@ +package transformer + +import ( + "fmt" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" +) + +func (t *transformer) handleAliasEvent(pi *processingInfo) ([]map[string]any, error) { + aliasEvent := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["traits"], aliasEvent, columnTypes, + "alias_traits_", 2, "", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], aliasEvent, columnTypes, + "alias_context_", 2, "context_", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, aliasEvent, columnTypes, + lo.Assign(rules.DefaultRules, rules.AliasRules), rules.DefaultFunctionalRules, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, aliasEvent, columnTypes); err != nil { + return nil, fmt.Errorf("storing rudder event: %w", err) + } + + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "aliases") + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, aliasEvent, columnTypes) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("handling merge event: %w", err) + } + + aliasOutput := map[string]any{ + "data": aliasEvent, + "metadata": map[string]any{ + "table": table, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{aliasOutput}, mergeEvents...), nil +} diff --git a/warehouse/transformer/alias_test.go b/warehouse/transformer/alias_test.go new file mode 100644 index 00000000000..1eda8dfe337 --- /dev/null +++ b/warehouse/transformer/alias_test.go @@ -0,0 +1,595 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestAlias(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "alias (Postgres)", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) without traits", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) without context", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_request_ip": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) store rudder event", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "storeFullEvent": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "rudder_event": "{\"type\":\"alias\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2}},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"previousId\":\"previousId\",\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"traits\":{\"title\":\"Home | RudderStack\",\"url\":\"http://www.rudderstack.com\"},\"userId\":\"userId\"}", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "rudder_event": "json", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) partial rules", + eventPayload: `{"type":"alias","messageId":"messageId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_ip": "string", + "context_passed_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "user_id", + "merge_property_1_value": "userId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "previousId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "userId", + "mergePropTwo": "previousId", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/error.go b/warehouse/transformer/error.go new file mode 100644 index 00000000000..df42e863594 --- /dev/null +++ b/warehouse/transformer/error.go @@ -0,0 +1,34 @@ +package transformer + +import ( + "net/http" +) + +type transErr struct { + message string + code int +} + +func (e *transErr) Error() string { + return e.message +} +func (e *transErr) StatusCode() int { return e.code } + +var _ error = (*transErr)(nil) + +var ( + errInternalServer = &transErr{message: "Internal Server Error", code: http.StatusInternalServerError} + errMergePropertiesMissing = &transErr{message: "either or both identifiers missing in mergeProperties", code: http.StatusBadRequest} + errMergePropertiesNotSufficient = &transErr{message: "either or both identifiers missing in mergeProperties", code: http.StatusBadRequest} + errMergePropertyOneInvalid = &transErr{message: "mergeProperties contains null values for expected inputs", code: http.StatusBadRequest} + errMergePropertyTwoInvalid = &transErr{message: "mergeProperties contains null values for expected inputs", code: http.StatusBadRequest} + errMergePropertyNull = &transErr{message: "mergeProperties contains null values for expected inputs", code: http.StatusBadRequest} + errMergePropertiesNotArray = &transErr{message: "mergeProperties contains null values for expected inputs", code: http.StatusBadRequest} +) + +func newTransErr(message string, statusCode int) *transErr { + return &transErr{ + message: message, + code: statusCode, + } +} diff --git a/warehouse/transformer/extract.go b/warehouse/transformer/extract.go new file mode 100644 index 00000000000..d62affdf8e0 --- /dev/null +++ b/warehouse/transformer/extract.go @@ -0,0 +1,70 @@ +package transformer + +import ( + "fmt" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/datatype" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" +) + +func (t *transformer) handleExtractEvent(pi *processingInfo) ([]map[string]any, error) { + extractEvent := make(map[string]any) + extractColumnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], extractEvent, extractColumnTypes, + "extract_context_", 2, "context_", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], extractEvent, extractColumnTypes, + "extract_properties_", 2, "", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + + eventColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "event") + if err != nil { + return nil, fmt.Errorf("safe column name: %w", err) + } + + d, ok := pi.event.Message[eventColName] + if !ok { + return nil, fmt.Errorf("cannot create event table with empty event name, event name is missing in the payload") + } + eventName, ok := d.(string) + if !ok || len(eventName) == 0 { + return nil, fmt.Errorf("cannot create event table with empty event name, event name is not a string") + } + + extractEvent[eventColName] = TransformTableName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, eventName) + extractColumnTypes[eventColName] = datatype.TypeString + + if err = t.setDataAndColumnTypeFromRules(pi, extractEvent, extractColumnTypes, + rules.ExtractRules, rules.ExtractFunctionalRules, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + + columnName := TransformColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, extractEvent[eventColName].(string)) + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, columnName) + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + excludeTable := excludeRudderCreatedTableNames(table, pi.itrOpts.skipReservedKeywordsEscaping) + + columns, err := t.getColumns(pi.event.Metadata.DestinationType, extractEvent, extractColumnTypes) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + extractOutput := map[string]any{ + "data": extractEvent, + "metadata": map[string]any{ + "table": excludeTable, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{extractOutput}, nil +} diff --git a/warehouse/transformer/extract_test.go b/warehouse/transformer/extract_test.go new file mode 100644 index 00000000000..e630327c7c1 --- /dev/null +++ b/warehouse/transformer/extract_test.go @@ -0,0 +1,498 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestExtract(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "extract (Postgres)", + eventPayload: `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "context_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "recordID", + "event": "event", + "received_at": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "event": "string", + "received_at": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) without properties", + eventPayload: `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "context_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "recordID", + "event": "event", + "received_at": "2021-09-01T00:00:00.000Z", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "event": "string", + "received_at": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) without context", + eventPayload: `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"}}`, + metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "id": "recordID", + "event": "event", + "received_at": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "id": "string", + "event": "string", + "received_at": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) RudderCreatedTable", + eventPayload: `{"type":"extract","recordId":"recordID","event":"accounts","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "storeFullEvent": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "context_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "recordID", + "event": "accounts", + "received_at": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "event": "string", + "received_at": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "_accounts", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) RudderCreatedTable with skipReservedKeywordsEscaping", + eventPayload: `{"type":"extract","recordId":"recordID","event":"accounts","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipReservedKeywordsEscaping":true}}}}`, + metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "context_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "recordID", + "event": "accounts", + "received_at": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "event": "string", + "received_at": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "accounts", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) RudderIsolatedTable", + eventPayload: `{"type":"extract","recordId":"recordID","event":"users","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "context_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "recordID", + "event": "users", + "received_at": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "event": "string", + "received_at": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "_users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/getcolumns.go b/warehouse/transformer/getcolumns.go new file mode 100644 index 00000000000..8bf3803436e --- /dev/null +++ b/warehouse/transformer/getcolumns.go @@ -0,0 +1,36 @@ +package transformer + +import ( + "fmt" + "net/http" + "strings" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func (t *transformer) getColumns(destType string, data map[string]any, columnTypes map[string]string) (map[string]any, error) { + columns := make(map[string]any) + + uuidTS := "uuid_ts" + if destType == whutils.SNOWFLAKE { + uuidTS = "UUID_TS" + } + columns[uuidTS] = "datetime" + + if destType == whutils.BQ { + columns["loaded_at"] = "datetime" + } + + for key, value := range data { + if dataType, ok := columnTypes[key]; ok { + columns[key] = dataType + } else { + columns[key] = t.getDataType(destType, key, value, false) + } + } + if len(columns) > t.config.maxColumnsInEvent.Load() && !utils.IsRudderSources(data) && !utils.IsDataLake(destType) { + return nil, newTransErr(fmt.Sprintf("%s transformer: Too many columns outputted from the event", strings.ToLower(destType)), http.StatusBadRequest) + } + return columns, nil +} diff --git a/warehouse/transformer/getdatatype.go b/warehouse/transformer/getdatatype.go new file mode 100644 index 00000000000..590d1a3a76b --- /dev/null +++ b/warehouse/transformer/getdatatype.go @@ -0,0 +1,111 @@ +package transformer + +import ( + "encoding/json" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/datatype" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func (t *transformer) getDataType(destType, key string, val any, jsonKey bool) string { + if typeName := getPrimitiveType(val); typeName != "" { + return typeName + } + if strVal, ok := val.(string); ok && utils.ValidTimestamp(strVal) { + return datatype.TypeDateTime + } + if override := t.getDataTypeOverride(destType, key, val, jsonKey); override != "" { + return override + } + return datatype.TypeString +} + +// hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/develop-rudder-transformer:fix.warehouse-src-dest-info +func getPrimitiveType(val any) string { + switch v := val.(type) { + case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: + return datatype.TypeInt + case float64: + return getFloatType(v) + case float32: + return getFloatType(float64(v)) + case bool: + return datatype.TypeBoolean + default: + return "" + } +} + +func getFloatType(v float64) string { + if v == float64(int64(v)) { + return datatype.TypeInt + } + return datatype.TypeFloat +} + +func (t *transformer) getDataTypeOverride(destType, key string, val any, jsonKey bool) string { + switch destType { + case whutils.POSTGRES, whutils.SNOWFLAKE: + return handlePostgresSnowflakeCase(key, jsonKey) + case whutils.RS: + return handleRSCase(val, jsonKey) + case whutils.CLICKHOUSE: + return t.handleClickHouseCase(destType, key, val) + default: + return "" + } +} + +func handlePostgresSnowflakeCase(key string, jsonKey bool) string { + if key == "violationErrors" || jsonKey { + return datatype.TypeJSON + } + return datatype.TypeString +} + +func handleRSCase(val any, jsonKey bool) string { + if jsonKey { + return datatype.TypeJSON + } + if sv, _ := json.Marshal(val); len(sv) > 512 { + return datatype.TypeText + } + return datatype.TypeString +} + +func (t *transformer) handleClickHouseCase(destType, key string, val any) string { + if !t.config.enableArraySupport.Load() { + return datatype.TypeString + } + + arrayVal, ok := val.([]any) + if !ok || len(arrayVal) == 0 { + return datatype.TypeString + } + return t.determineClickHouseArrayType(destType, key, arrayVal) +} + +func (t *transformer) determineClickHouseArrayType(destType, key string, arrayVal []any) string { + finalDataType := t.getDataType(destType, key, arrayVal[0], false) + + for i := 1; i < len(arrayVal); i++ { + dataType := t.getDataType(destType, key, arrayVal[i], false) + + if finalDataType == dataType { + continue + } + if finalDataType == datatype.TypeString { + break + } + if dataType == datatype.TypeFloat && finalDataType == datatype.TypeInt { + finalDataType = datatype.TypeFloat + continue + } + if dataType == datatype.TypeInt && finalDataType == datatype.TypeFloat { + continue + } + finalDataType = datatype.TypeString + } + return datatype.TypeArray + "(" + finalDataType + ")" +} diff --git a/warehouse/transformer/getdatatype_test.go b/warehouse/transformer/getdatatype_test.go new file mode 100644 index 00000000000..d544a7a5f1f --- /dev/null +++ b/warehouse/transformer/getdatatype_test.go @@ -0,0 +1,79 @@ +package transformer + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestGetDataType(t *testing.T) { + testCases := []struct { + name, destType, key string + val any + jsonKey, enableArraySupport bool + expected string + }{ + // Primitive types + {"Primitive Type Int", whutils.POSTGRES, "someKey", 42, false, false, "int"}, + {"Primitive Type Float", whutils.POSTGRES, "someKey", 42.0, false, false, "int"}, + {"Primitive Type Float (non-int)", whutils.POSTGRES, "someKey", 42.5, false, false, "float"}, + {"Primitive Type Bool", whutils.POSTGRES, "someKey", true, false, false, "boolean"}, + + // Valid timestamp + {"Valid Timestamp String", whutils.POSTGRES, "someKey", "2022-10-05T14:48:00.000Z", false, false, "datetime"}, + + // JSON Key cases for different destinations + {"Postgres JSON Key", whutils.POSTGRES, "someKey", "someValue", true, false, "json"}, + {"Snowflake JSON Key", whutils.SNOWFLAKE, "someKey", "someValue", true, false, "json"}, + {"Redshift JSON Key", whutils.RS, "someKey", "someValue", true, false, "json"}, + + // Redshift with text and string types + {"Redshift Text Type", whutils.RS, "someKey", string(make([]byte, 513)), false, false, "text"}, + {"Redshift String Type", whutils.RS, "someKey", "shortValue", false, false, "string"}, + + // ClickHouse - Array support enabled + {"ClickHouse Array Type Int", whutils.CLICKHOUSE, "someKey", []any{1, 2, 3}, false, true, "array(int)"}, + {"ClickHouse Array Type Mixed Int and Float", whutils.CLICKHOUSE, "someKey", []any{1, 2.5}, false, true, "array(float)"}, + {"ClickHouse Array Type Mixed Int, Float, and String", whutils.CLICKHOUSE, "someKey", []any{1, 2.5, "text"}, false, true, "array(string)"}, + {"ClickHouse Array Type All Strings", whutils.CLICKHOUSE, "someKey", []any{"one", "two"}, false, true, "array(string)"}, + {"ClickHouse Empty Array", whutils.CLICKHOUSE, "someKey", []any{}, false, true, "string"}, // Empty array should return "string" + {"ClickHouse Array Type All Floats", whutils.CLICKHOUSE, "someKey", []any{1.1, 2.2, 3.3}, false, true, "array(float)"}, + {"ClickHouse Array Int then Float", whutils.CLICKHOUSE, "someKey", []any{1, 2.5, 3}, false, true, "array(float)"}, + {"ClickHouse Array Break on String", whutils.CLICKHOUSE, "someKey", []any{1, 2.5, "text"}, false, true, "array(string)"}, // Breaks on string + + // ClickHouse - Array support disabled + {"ClickHouse No Array Support", whutils.CLICKHOUSE, "someKey", []any{1, 2, 3}, false, false, "string"}, + {"ClickHouse No Array Support Mixed", whutils.CLICKHOUSE, "someKey", []any{1, 2.5}, false, false, "string"}, + {"ClickHouse No Array Support with Strings", whutils.CLICKHOUSE, "someKey", []any{"one", "two"}, false, false, "string"}, + {"ClickHouse No Array Support Empty Array", whutils.CLICKHOUSE, "someKey", []any{}, false, false, "string"}, + + // Complex Nested Arrays (Array support enabled) + {"ClickHouse Nested Arrays Mixed Int and Float", whutils.CLICKHOUSE, "someKey", []any{[]any{1, 2.5}, []any{3, 4}}, false, true, "array(string)"}, + {"ClickHouse Nested Arrays Int", whutils.CLICKHOUSE, "someKey", []any{[]any{1, 2}, []any{3, 4}}, false, true, "array(array(int))"}, + + // Empty string values + {"Empty String Value", whutils.POSTGRES, "someKey", "", false, false, "string"}, + {"Empty String with JSON Key", whutils.POSTGRES, "someKey", "", true, false, "json"}, + + // Unsupported types (should default to string) + {"Unsupported Type Struct", whutils.POSTGRES, "someKey", struct{}{}, false, false, "string"}, + {"Unsupported Type Map", whutils.POSTGRES, "someKey", map[string]any{"key": "value"}, false, false, "string"}, + + // Special string values + {"Special Timestamp-like String", whutils.POSTGRES, "someKey", "not-a-timestamp", false, false, "string"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + destinationTransformer := &transformer{} + destinationTransformer.config.enableArraySupport = config.SingleValueLoader(tc.enableArraySupport) + + actual := destinationTransformer.getDataType(tc.destType, tc.key, tc.val, tc.jsonKey) + require.Equal(t, tc.expected, actual) + }) + } +} diff --git a/warehouse/transformer/group.go b/warehouse/transformer/group.go new file mode 100644 index 00000000000..9880e5e0bca --- /dev/null +++ b/warehouse/transformer/group.go @@ -0,0 +1,58 @@ +package transformer + +import ( + "fmt" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" +) + +func (t *transformer) handleGroupEvent(pi *processingInfo) ([]map[string]any, error) { + groupEvent := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["traits"], groupEvent, columnTypes, + "group_traits_", 2, "", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], groupEvent, columnTypes, + "group_context_", 2, "context_", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, groupEvent, columnTypes, + lo.Assign(rules.DefaultRules, rules.GroupRules), rules.DefaultFunctionalRules, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, groupEvent, columnTypes); err != nil { + return nil, fmt.Errorf("storing rudder event: %w", err) + } + + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "groups") + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, groupEvent, columnTypes) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("handling merge event: %w", err) + } + + groupOutput := map[string]any{ + "data": groupEvent, + "metadata": map[string]any{ + "table": table, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{groupOutput}, mergeEvents...), nil +} diff --git a/warehouse/transformer/group_test.go b/warehouse/transformer/group_test.go new file mode 100644 index 00000000000..ded11706a84 --- /dev/null +++ b/warehouse/transformer/group_test.go @@ -0,0 +1,595 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestGroup(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "group (Postgres)", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "group_id": "groupId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "group_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "groups", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) without traits", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + "group_id": "groupId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "group_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "groups", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) without context", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"}}`, + metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "group_id": "groupId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_request_ip": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "group_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "groups", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) store rudder event", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "storeFullEvent": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "group_id": "groupId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "rudder_event": "{\"type\":\"group\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2}},\"groupId\":\"groupId\",\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"traits\":{\"title\":\"Home | RudderStack\",\"url\":\"http://www.rudderstack.com\"},\"userId\":\"userId\"}", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "group_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "rudder_event": "json", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "groups", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) partial rules", + eventPayload: `{"type":"group","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "group_id": "groupId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_ip": "string", + "context_passed_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "group_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "groups", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "group_id": "groupId", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "group_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "_groups", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "group", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/identify.go b/warehouse/transformer/identify.go new file mode 100644 index 00000000000..dad5c1cee42 --- /dev/null +++ b/warehouse/transformer/identify.go @@ -0,0 +1,180 @@ +package transformer + +import ( + "fmt" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +func (t *transformer) handleIdentifyEvent(pi *processingInfo) ([]map[string]any, error) { + commonProps, commonColumnTypes, err := t.identifyCommonProps(pi) + if err != nil { + return nil, fmt.Errorf("identifying common properties: %w", err) + } + + identifiesResponse, err := t.identifiesResponse(pi, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("identifies response: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("handling merge event: %w", err) + } + + usersResponse, err := t.usersResponse(pi, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("users response: %w", err) + } + return append(append(identifiesResponse, mergeEvents...), usersResponse...), nil +} + +func (t *transformer) identifyCommonProps(pi *processingInfo) (map[string]any, map[string]string, error) { + commonProps := make(map[string]any) + commonColumnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["userProperties"], commonProps, commonColumnTypes, + "identify_userProperties_", 2, "", 0, + ); err != nil { + return nil, nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if pi.dstOpts.allowUsersContextTraits { + contextTraits := misc.MapLookup(pi.event.Message, "context", "traits") + + if err := t.setDataAndColumnTypeFromInput(pi, contextTraits, commonProps, commonColumnTypes, + "identify_context_traits_", 3, "", 0, + ); err != nil { + return nil, nil, fmt.Errorf("setting data and column types from message: %w", err) + } + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["traits"], commonProps, commonColumnTypes, + "identify_traits_", 2, "", 0, + ); err != nil { + return nil, nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], commonProps, commonColumnTypes, + "identify_context_", 2, "context_", 0, + ); err != nil { + return nil, nil, fmt.Errorf("setting data and column types from message: %w", err) + } + + userIDColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "user_id") + if err != nil { + return nil, nil, fmt.Errorf("safe column name: %w", err) + } + if k, ok := commonProps[userIDColName]; ok && k != nil { + revUserIDCol := "_" + userIDColName + + commonProps[revUserIDCol] = commonProps[userIDColName] + commonColumnTypes[revUserIDCol] = commonColumnTypes[userIDColName] + + delete(commonProps, userIDColName) + delete(commonColumnTypes, userIDColName) + } + return commonProps, commonColumnTypes, nil +} + +func (t *transformer) identifiesResponse(pi *processingInfo, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + identifiesEvent := make(map[string]any) + identifiesColumnTypes := make(map[string]string) + + identifiesEvent = lo.Assign(identifiesEvent, commonProps) + + if err := t.setDataAndColumnTypeFromRules(pi, identifiesEvent, identifiesColumnTypes, + rules.DefaultRules, rules.DefaultFunctionalRules, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, identifiesEvent, identifiesColumnTypes); err != nil { + return nil, fmt.Errorf("storing rudder event: %w", err) + } + + identifiesTable, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "identifies") + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + identifiesColumns, err := t.getColumns(pi.event.Metadata.DestinationType, identifiesEvent, lo.Assign(commonColumnTypes, identifiesColumnTypes)) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + identifiesOutput := map[string]any{ + "data": identifiesEvent, + "metadata": map[string]any{ + "table": identifiesTable, + "columns": identifiesColumns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{identifiesOutput}, nil +} + +func (t *transformer) usersResponse(pi *processingInfo, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + userID := misc.MapLookup(pi.event.Message, "userId") + if userID == nil { + return nil, nil + } + if pi.itrOpts.skipUsersTable || pi.dstOpts.skipUsersTable { + return nil, nil + } + + usersEvent := make(map[string]any) + usersColumnTypes := make(map[string]string) + + usersEvent = lo.Assign(usersEvent, commonProps) + + var rulesMap map[string]string + var functionalRulesMap map[string]rules.FunctionalRules + if utils.IsDataLake(pi.event.Metadata.DestinationType) { + rulesMap = rules.IdentifyDataLakeRules + functionalRulesMap = rules.IdentifyFunctionalRules + } else { + rulesMap = rules.IdentifyNonDataLakeRules + functionalRulesMap = rules.IdentifyFunctionalRules + } + + if err := t.setDataAndColumnTypeFromRules(pi, usersEvent, usersColumnTypes, + rulesMap, functionalRulesMap, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + + idColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "id") + if err != nil { + return nil, fmt.Errorf("safe column name: %w", err) + } + usersEvent[idColName] = userID + usersColumnTypes[idColName] = t.getDataType(pi.event.Metadata.DestinationType, idColName, userID, false) + + receivedAtColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "received_at") + if err != nil { + return nil, fmt.Errorf("safe column name: %w", err) + } + usersEvent[receivedAtColName] = pi.event.Metadata.ReceivedAt + usersColumnTypes[receivedAtColName] = "datetime" + + tableName, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "users") + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, usersEvent, lo.Assign(commonColumnTypes, usersColumnTypes)) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + usersOutput := map[string]any{ + "data": usersEvent, + "metadata": map[string]any{ + "table": tableName, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{usersOutput}, nil +} diff --git a/warehouse/transformer/identify_test.go b/warehouse/transformer/identify_test.go new file mode 100644 index 00000000000..896109e62db --- /dev/null +++ b/warehouse/transformer/identify_test.go @@ -0,0 +1,2298 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestIdentify(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "identify (POSTGRES)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (S3_DATALAKE)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "S3_DATALAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "S3_DATALAKE", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "S3_DATALAKE", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "S3_DATALAKE", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "_timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "_timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "S3_DATALAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "S3_DATALAKE", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "S3_DATALAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without traits", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without userProperties", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "received_at": "2021-09-01T00:00:00.000Z", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "received_at": "datetime", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "received_at": "2021-09-01T00:00:00.000Z", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "received_at": "datetime", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without context.traits", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "id": "userId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without context", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "id": "userId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) not allowUsersContextTraits", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "userId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) context.traits not map", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":"traits","ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits": "traits", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits": "traits", + "id": "userId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) user_id already exists", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"user_id":"user_id","rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) store rudder event", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "storeFullEvent": true, + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + "rudder_event": "{\"type\":\"identify\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"}},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"traits\":{\"product_id\":\"9578257311\",\"review_id\":\"86ac1cd43\"},\"userId\":\"userId\",\"userProperties\":{\"rating\":3,\"review_body\":\"OK for the price. It works but the material feels flimsy.\"}}", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + "rudder_event": "json", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) partial rules", + eventPayload: `{"type":"identify","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) no userID", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) skipUsersTable (dstOpts)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + "skipUsersTable": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) skipUsersTable (itrOpts)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`, + metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{ + "allowUsersContextTraits": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "merge_property_1_type": "string", + "merge_property_1_value": "string", + "merge_property_2_type": "string", + "merge_property_2_value": "string", + }, + "isMergeRule": true, + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "rudder_identity_merge_rules", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "identify", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/internal/datatype/datatype.go b/warehouse/transformer/internal/datatype/datatype.go new file mode 100644 index 00000000000..09eb67697c5 --- /dev/null +++ b/warehouse/transformer/internal/datatype/datatype.go @@ -0,0 +1,12 @@ +package datatype + +const ( + TypeInt = "int" + TypeFloat = "float" + TypeString = "string" + TypeBoolean = "boolean" + TypeDateTime = "datetime" + TypeJSON = "json" + TypeText = "text" + TypeArray = "array" +) diff --git a/warehouse/transformer/internal/rules/alias.go b/warehouse/transformer/internal/rules/alias.go new file mode 100644 index 00000000000..61d82ff6035 --- /dev/null +++ b/warehouse/transformer/internal/rules/alias.go @@ -0,0 +1,5 @@ +package rules + +var AliasRules = map[string]string{ + "previous_id": "previousId", +} diff --git a/warehouse/transformer/internal/rules/default.go b/warehouse/transformer/internal/rules/default.go new file mode 100644 index 00000000000..d9c811401bd --- /dev/null +++ b/warehouse/transformer/internal/rules/default.go @@ -0,0 +1,25 @@ +package rules + +import ( + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +var ( + DefaultRules = map[string]string{ + "id": "messageId", + "anonymous_id": "anonymousId", + "user_id": "userId", + "sent_at": "sentAt", + "timestamp": "timestamp", + "received_at": "receivedAt", + "original_timestamp": "originalTimestamp", + "channel": "channel", + "context_request_ip": "request_ip", + "context_passed_ip": "context.ip", + } + DefaultFunctionalRules = map[string]FunctionalRules{ + "context_ip": func(event ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"context.ip", "request_ip"}), nil + }, + } +) diff --git a/warehouse/transformer/internal/rules/extract.go b/warehouse/transformer/internal/rules/extract.go new file mode 100644 index 00000000000..99059d80da0 --- /dev/null +++ b/warehouse/transformer/internal/rules/extract.go @@ -0,0 +1,33 @@ +package rules + +import ( + "fmt" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +var ( + ExtractRules = map[string]string{ + "received_at": "receivedAt", + "event": "event", + } + ExtractFunctionalRules = map[string]FunctionalRules{ + "id": func(event ptrans.TransformerEvent) (any, error) { + return extractRecordID(event.Metadata) + }, + } + + errRecordIDEmpty = fmt.Errorf("recordId cannot be empty for cloud sources events") + errRecordIDObject = fmt.Errorf("recordId cannot be an object for cloud sources events") +) + +func extractRecordID(metadata ptrans.Metadata) (any, error) { + if metadata.RecordID == nil || metadata.RecordID == "" { + return nil, errRecordIDEmpty + } + if utils.IsObject(metadata.RecordID) { + return nil, errRecordIDObject + } + return metadata.RecordID, nil +} diff --git a/warehouse/transformer/internal/rules/group.go b/warehouse/transformer/internal/rules/group.go new file mode 100644 index 00000000000..5b03ed97b32 --- /dev/null +++ b/warehouse/transformer/internal/rules/group.go @@ -0,0 +1,5 @@ +package rules + +var GroupRules = map[string]string{ + "group_id": "groupId", +} diff --git a/warehouse/transformer/internal/rules/identify.go b/warehouse/transformer/internal/rules/identify.go new file mode 100644 index 00000000000..510cb58a941 --- /dev/null +++ b/warehouse/transformer/internal/rules/identify.go @@ -0,0 +1,24 @@ +package rules + +import ( + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +var ( + IdentifyDataLakeRules = map[string]string{ + "context_request_ip": "request_ip", + "context_passed_ip": "context.ip", + } + IdentifyNonDataLakeRules = map[string]string{ + "context_request_ip": "request_ip", + "context_passed_ip": "context.ip", + "sent_at": "sentAt", + "timestamp": "timestamp", + "original_timestamp": "originalTimestamp", + } + IdentifyFunctionalRules = map[string]FunctionalRules{ + "context_ip": func(event ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"context.ip", "request_ip"}), nil + }, + } +) diff --git a/warehouse/transformer/internal/rules/page.go b/warehouse/transformer/internal/rules/page.go new file mode 100644 index 00000000000..bad513cce88 --- /dev/null +++ b/warehouse/transformer/internal/rules/page.go @@ -0,0 +1,11 @@ +package rules + +import ( + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +var PageFunctionalRules = map[string]FunctionalRules{ + "name": func(event ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"name", "properties.name"}), nil + }, +} diff --git a/warehouse/transformer/internal/rules/rules.go b/warehouse/transformer/internal/rules/rules.go new file mode 100644 index 00000000000..db1b7aabf5b --- /dev/null +++ b/warehouse/transformer/internal/rules/rules.go @@ -0,0 +1,48 @@ +package rules + +import ( + "strings" + + "github.com/samber/lo" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" +) + +type FunctionalRules func(event ptrans.TransformerEvent) (any, error) + +var rudderReservedColumns = map[string]map[string]struct{}{ + "track": createReservedColumns(lo.Keys(DefaultRules, TrackRules), lo.Keys(DefaultFunctionalRules, TrackTableFunctionalRules, TrackEventTableFunctionalRules)), + "page": createReservedColumns(lo.Keys(DefaultRules), lo.Keys(DefaultFunctionalRules, PageFunctionalRules)), + "screen": createReservedColumns(lo.Keys(DefaultRules), lo.Keys(DefaultFunctionalRules, ScreenFunctionalRules)), + "identify": createReservedColumns(lo.Keys(DefaultRules, IdentifyDataLakeRules, IdentifyNonDataLakeRules), lo.Keys(DefaultFunctionalRules, IdentifyFunctionalRules)), + "group": createReservedColumns(lo.Keys(DefaultRules, GroupRules), lo.Keys(DefaultFunctionalRules)), + "alias": createReservedColumns(lo.Keys(DefaultRules, AliasRules), lo.Keys(DefaultFunctionalRules)), + "extract": createReservedColumns(lo.Keys(ExtractRules), lo.Keys(ExtractFunctionalRules)), +} + +func firstValidValue(message map[string]any, props []string) any { + for _, prop := range props { + propKeys := strings.Split(prop, ".") + if val := misc.MapLookup(message, propKeys...); val != nil { + return val + } + } + return nil +} + +func createReservedColumns(rules, functionRules []string) map[string]struct{} { + return lo.SliceToMap(append(lo.Uniq(rules), lo.Uniq(functionRules)...), func(item string) (string, struct{}) { + return item, struct{}{} + }) +} + +func IsRudderReservedColumn(eventType, columnName string) bool { + if _, ok := rudderReservedColumns[eventType]; !ok { + return false + } + if _, ok := rudderReservedColumns[eventType][columnName]; ok { + return true + } + return false +} diff --git a/warehouse/transformer/internal/rules/rules_test.go b/warehouse/transformer/internal/rules/rules_test.go new file mode 100644 index 00000000000..0442a988d00 --- /dev/null +++ b/warehouse/transformer/internal/rules/rules_test.go @@ -0,0 +1,124 @@ +package rules + +import ( + "testing" + + "github.com/stretchr/testify/require" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/types" +) + +func TestIsRudderReservedColumn(t *testing.T) { + testCases := []struct { + name string + eventType string + columnName string + expected bool + }{ + {name: "track", eventType: "track", columnName: "id", expected: true}, + {name: "page", eventType: "page", columnName: "id", expected: true}, + {name: "screen", eventType: "screen", columnName: "id", expected: true}, + {name: "identify", eventType: "identify", columnName: "id", expected: true}, + {name: "group", eventType: "group", columnName: "id", expected: true}, + {name: "alias", eventType: "alias", columnName: "id", expected: true}, + {name: "extract", eventType: "extract", columnName: "id", expected: true}, + {name: "not reserved event type", eventType: "not reserved", columnName: "id", expected: false}, + {name: "not reserved column name", eventType: "track", columnName: "not reserved", expected: false}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, IsRudderReservedColumn(tc.eventType, tc.columnName)) + }) + } +} + +func TestExtractRecordID(t *testing.T) { + testCases := []struct { + name string + metadata ptrans.Metadata + expectedRecordID any + expectedError error + }{ + {name: "recordId is nil", metadata: ptrans.Metadata{RecordID: nil}, expectedRecordID: nil, expectedError: errRecordIDEmpty}, + {name: "recordId is empty", metadata: ptrans.Metadata{RecordID: ""}, expectedRecordID: nil, expectedError: errRecordIDEmpty}, + {name: "recordId is not empty", metadata: ptrans.Metadata{RecordID: "123"}, expectedRecordID: "123", expectedError: nil}, + {name: "recordId is an object", metadata: ptrans.Metadata{RecordID: map[string]any{"key": "value"}}, expectedRecordID: nil, expectedError: errRecordIDObject}, + {name: "recordId is a string", metadata: ptrans.Metadata{RecordID: "123"}, expectedRecordID: "123", expectedError: nil}, + {name: "recordId is a number", metadata: ptrans.Metadata{RecordID: 123}, expectedRecordID: 123, expectedError: nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + recordID, err := extractRecordID(tc.metadata) + require.Equal(t, tc.expectedError, err) + require.Equal(t, tc.expectedRecordID, recordID) + }) + } +} + +func TestExtractCloudRecordID(t *testing.T) { + testCases := []struct { + name string + message types.SingularEventT + metadata ptrans.Metadata + fallbackValue any + expectedRecordID any + expectedError error + }{ + {name: "sources version is nil", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": nil}}}, metadata: ptrans.Metadata{}, fallbackValue: "fallback", expectedRecordID: "fallback", expectedError: nil}, + {name: "sources version is empty", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": ""}}}, metadata: ptrans.Metadata{}, fallbackValue: "fallback", expectedRecordID: "fallback", expectedError: nil}, + {name: "sources version is not empty", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: "123"}, fallbackValue: "fallback", expectedRecordID: "123", expectedError: nil}, + {name: "recordId is nil", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{}, fallbackValue: "fallback", expectedRecordID: nil, expectedError: errRecordIDEmpty}, + {name: "recordId is empty", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: ""}, fallbackValue: "fallback", expectedRecordID: nil, expectedError: errRecordIDEmpty}, + {name: "recordId is an object", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: map[string]any{"key": "value"}}, fallbackValue: "fallback", expectedRecordID: nil, expectedError: errRecordIDObject}, + {name: "recordId is a string", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: "123"}, fallbackValue: "fallback", expectedRecordID: "123", expectedError: nil}, + {name: "recordId is a number", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: 123}, fallbackValue: "fallback", expectedRecordID: 123, expectedError: nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + recordID, err := extractCloudRecordID(tc.message, tc.metadata, tc.fallbackValue) + require.Equal(t, tc.expectedError, err) + require.Equal(t, tc.expectedRecordID, recordID) + }) + } +} + +func TestFunctionalRules(t *testing.T) { + t.Run("default", func(t *testing.T) { + testCases := []struct { + name string + functionalRule FunctionalRules + event ptrans.TransformerEvent + expected any + expectedError error + }{ + {name: "default (context.ip)", functionalRule: DefaultFunctionalRules["context_ip"], event: ptrans.TransformerEvent{Message: map[string]any{"context": map[string]any{"ip": "1.2.3.4"}}}, expected: "1.2.3.4", expectedError: nil}, + {name: "default (request_ip)", functionalRule: DefaultFunctionalRules["context_ip"], event: ptrans.TransformerEvent{Message: map[string]any{"request_ip": "1.2.3.4"}}, expected: "1.2.3.4", expectedError: nil}, + {name: "default (not available)", functionalRule: DefaultFunctionalRules["context_ip"], event: ptrans.TransformerEvent{Message: map[string]any{}}, expected: nil, expectedError: nil}, + {name: "extract (id)", functionalRule: ExtractFunctionalRules["id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{RecordID: "123"}}, expected: "123", expectedError: nil}, + {name: "extract (empty)", functionalRule: ExtractFunctionalRules["id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{RecordID: ""}}, expected: nil, expectedError: errRecordIDEmpty}, + {name: "identify (context.ip)", functionalRule: IdentifyFunctionalRules["context_ip"], event: ptrans.TransformerEvent{Message: map[string]any{"context": map[string]any{"ip": "1.2.3.4"}}}, expected: "1.2.3.4", expectedError: nil}, + {name: "identify (request_ip)", functionalRule: IdentifyFunctionalRules["context_ip"], event: ptrans.TransformerEvent{Message: map[string]any{"request_ip": "1.2.3.4"}}, expected: "1.2.3.4", expectedError: nil}, + {name: "identify (not available)", functionalRule: IdentifyFunctionalRules["context_ip"], event: ptrans.TransformerEvent{Message: map[string]any{}}, expected: nil, expectedError: nil}, + {name: "page (name)", functionalRule: PageFunctionalRules["name"], event: ptrans.TransformerEvent{Message: map[string]any{"name": "page name"}}, expected: "page name", expectedError: nil}, + {name: "page (properties.name)", functionalRule: PageFunctionalRules["name"], event: ptrans.TransformerEvent{Message: map[string]any{"properties": map[string]any{"name": "page name"}}}, expected: "page name", expectedError: nil}, + {name: "page (not available)", functionalRule: PageFunctionalRules["name"], event: ptrans.TransformerEvent{Message: map[string]any{}}, expected: nil, expectedError: nil}, + {name: "screen (name)", functionalRule: ScreenFunctionalRules["name"], event: ptrans.TransformerEvent{Message: map[string]any{"name": "screen name"}}, expected: "screen name", expectedError: nil}, + {name: "screen (properties.name)", functionalRule: ScreenFunctionalRules["name"], event: ptrans.TransformerEvent{Message: map[string]any{"properties": map[string]any{"name": "screen name"}}}, expected: "screen name", expectedError: nil}, + {name: "screen (not available)", functionalRule: ScreenFunctionalRules["name"], event: ptrans.TransformerEvent{Message: map[string]any{}}, expected: nil, expectedError: nil}, + {name: "track (record_id)", functionalRule: TrackTableFunctionalRules["record_id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{EventType: "track", SourceCategory: "cloud", RecordID: "123"}, Message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}}, expected: "123", expectedError: nil}, + {name: "track (record_id) convert to string", functionalRule: TrackTableFunctionalRules["record_id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{EventType: "track", SourceCategory: "cloud", RecordID: 123}, Message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}}, expected: "123", expectedError: nil}, + {name: "track (not cloud)", functionalRule: TrackTableFunctionalRules["record_id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{EventType: "track", SourceCategory: "not cloud"}}, expected: nil, expectedError: nil}, + {name: "track (id)", functionalRule: TrackEventTableFunctionalRules["id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{EventType: "track", SourceCategory: "cloud", RecordID: "123"}, Message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}}, expected: "123", expectedError: nil}, + {name: "track (id) don't convert to string", functionalRule: TrackEventTableFunctionalRules["id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{EventType: "track", SourceCategory: "cloud", RecordID: 123}, Message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}}, expected: 123, expectedError: nil}, + {name: "track (not cloud)", functionalRule: TrackEventTableFunctionalRules["id"], event: ptrans.TransformerEvent{Metadata: ptrans.Metadata{EventType: "track", SourceCategory: "not cloud", MessageID: "message-id"}}, expected: "message-id", expectedError: nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := tc.functionalRule(tc.event) + require.Equal(t, tc.expectedError, err) + require.Equal(t, tc.expected, result) + }) + } + }) +} diff --git a/warehouse/transformer/internal/rules/screen.go b/warehouse/transformer/internal/rules/screen.go new file mode 100644 index 00000000000..15b6ad4c7b2 --- /dev/null +++ b/warehouse/transformer/internal/rules/screen.go @@ -0,0 +1,11 @@ +package rules + +import ( + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +var ScreenFunctionalRules = map[string]FunctionalRules{ + "name": func(event ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"name", "properties.name"}), nil + }, +} diff --git a/warehouse/transformer/internal/rules/track.go b/warehouse/transformer/internal/rules/track.go new file mode 100644 index 00000000000..d8d3a6f6e2e --- /dev/null +++ b/warehouse/transformer/internal/rules/track.go @@ -0,0 +1,48 @@ +package rules + +import ( + "fmt" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/types" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +var ( + TrackRules = map[string]string{ + "event_text": "event", + } + TrackTableFunctionalRules = map[string]FunctionalRules{ + "record_id": func(event ptrans.TransformerEvent) (any, error) { + eventType := event.Metadata.EventType + canUseRecordID := utils.CanUseRecordID(event.Metadata.SourceCategory) + if eventType == "track" && canUseRecordID { + cr, err := extractCloudRecordID(event.Message, event.Metadata, nil) + if err != nil { + return nil, fmt.Errorf("extracting cloud record id: %w", err) + } + return fmt.Sprint(cr), nil + } + return nil, nil // nolint: nilnil + }, + } + TrackEventTableFunctionalRules = map[string]FunctionalRules{ + "id": func(event ptrans.TransformerEvent) (any, error) { + eventType := event.Metadata.EventType + canUseRecordID := utils.CanUseRecordID(event.Metadata.SourceCategory) + if eventType == "track" && canUseRecordID { + return extractCloudRecordID(event.Message, event.Metadata, event.Metadata.MessageID) + } + return event.Metadata.MessageID, nil + }, + } +) + +func extractCloudRecordID(message types.SingularEventT, metadata ptrans.Metadata, fallbackValue any) (any, error) { + sourcesVersion := misc.MapLookup(message, "context", "sources", "version") + if sourcesVersion == nil || sourcesVersion == "" { + return fallbackValue, nil + } + return extractRecordID(metadata) +} diff --git a/warehouse/transformer/internal/snakecase/constant.go b/warehouse/transformer/internal/snakecase/constant.go new file mode 100644 index 00000000000..e4fcdc94c1f --- /dev/null +++ b/warehouse/transformer/internal/snakecase/constant.go @@ -0,0 +1,50 @@ +package snakecase + +const ( + // Used to compose unicode character classes. + rsAstralRange = "\\ud800-\\udfff" + rsComboMarksRange = "\\u0300-\\u036f" + reComboHalfMarksRange = "\\ufe20-\\ufe2f" + rsComboSymbolsRange = "\\u20d0-\\u20ff" + rsComboMarksExtendedRange = "\\u1ab0-\\u1aff" + rsComboMarksSupplementRange = "\\u1dc0-\\u1dff" + rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange + rsComboMarksExtendedRange + rsComboMarksSupplementRange + rsDingbatRange = "\\u2700-\\u27bf" + rsLowerRange = "a-z\\xdf-\\xf6\\xf8-\\xff" + rsMathOpRange = "\\xac\\xb1\\xd7\\xf7" + rsNonCharRange = "\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf" + rsPunctuationRange = "\\u2000-\\u206f" + rsSpaceRange = " \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000" + rsUpperRange = "A-Z\\xc0-\\xd6\\xd8-\\xde" + rsVarRange = "\\ufe0e\\ufe0f" + rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange + + // Used to compose unicode capture groups + rsApos = "['\u2019]" + rsBreak = "[" + rsBreakRange + "]" + rsCombo = "[" + rsComboRange + "]" + rsDigit = "\\d" + rsDingbat = "[" + rsDingbatRange + "]" + rsLower = "[" + rsLowerRange + "]" + rsMisc = "[^" + rsAstralRange + rsBreakRange + rsDigit + rsDingbatRange + rsLowerRange + rsUpperRange + "]" + rsFitz = "\\ud83c[\\udffb-\\udfff]" + rsModifier = "(?:" + rsCombo + "|" + rsFitz + ")" + rsNonAstral = "[^" + rsAstralRange + "]" + rsRegional = "(?:\\ud83c[\\udde6-\\uddff]){2}" + rsSurrPair = "[\\ud800-\\udbff][\\udc00-\\udfff]" + rsUpper = "[" + rsUpperRange + "]" + rsZWJ = "\\u200d" + + // Used to compose unicode regexes + rsMiscLower = "(?:" + rsLower + "|" + rsMisc + ")" + rsMiscUpper = "(?:" + rsUpper + "|" + rsMisc + ")" + rsOptContrLower = "(?:" + rsApos + "(?:d|ll|m|re|s|t|ve))?" + rsOptContrUpper = "(?:" + rsApos + "(?:D|LL|M|RE|S|T|VE))?" + reOptMod = rsModifier + "?" + rsOptVar = "[" + rsVarRange + "]?" + rsOptJoin = "(?:" + rsZWJ + "(?:" + rsNonAstral + "|" + rsRegional + "|" + rsSurrPair + ")" + rsOptVar + reOptMod + ")*" + rsOrdLower = "\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])" + rsOrdUpper = "\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])" + rsSeq = rsOptVar + reOptMod + rsOptJoin + rsEmoji = "(?:" + rsDingbat + "|" + rsRegional + "|" + rsSurrPair + ")" + rsSeq +) diff --git a/warehouse/transformer/internal/snakecase/regex.go b/warehouse/transformer/internal/snakecase/regex.go new file mode 100644 index 00000000000..bdc5e793802 --- /dev/null +++ b/warehouse/transformer/internal/snakecase/regex.go @@ -0,0 +1,50 @@ +package snakecase + +import ( + "strings" + + "github.com/dlclark/regexp2" +) + +var ( + reUnicodeWords = regexp2.MustCompile( + strings.Join( + []string{ + rsUpper + "?" + rsLower + "+" + rsOptContrLower + "(?=" + rsBreak + "|" + rsUpper + "|" + "$)", // Regular words, lowercase letters followed by optional contractions + rsMiscUpper + "+" + rsOptContrUpper + "(?=" + rsBreak + "|" + rsUpper + rsMiscLower + "|" + "$)", // Miscellaneous uppercase characters with optional contractions + rsUpper + "?" + rsMiscLower + "+" + rsOptContrLower, // Miscellaneous lowercase sequences with optional contractions + rsUpper + "+" + rsOptContrUpper, // All uppercase words with optional contractions (e.g., "THIS") + rsOrdUpper, // Ordinals for uppercase (e.g., "1ST", "2ND") + rsOrdLower, // Ordinals for lowercase (e.g., "1st", "2nd") + rsDigit + "+", // Pure digits (e.g., "123") + rsEmoji, // Emojis (e.g., 😀, ❤️) + }, + "|", + ), + regexp2.None, + ) + reUnicodeWordsWithNumbers = regexp2.MustCompile( + strings.Join( + []string{ + rsUpper + "?" + rsLower + "+" + rsDigit + "+", // Lowercase letters followed by digits (e.g., "abc123") + rsUpper + "+" + rsDigit + "+", // Uppercase letters followed by digits (e.g., "ABC123") + rsDigit + "+" + rsUpper + "?" + rsLower + "+", // Digits followed by lowercase letters (e.g., "123abc") + rsDigit + "+" + rsUpper + "+", // Digits followed by uppercase letters (e.g., "123ABC") + rsUpper + "?" + rsLower + "+" + rsOptContrLower + "(?=" + rsBreak + "|" + rsUpper + "|" + "$)", // Regular words, lowercase letters followed by optional contractions + rsMiscUpper + "+" + rsOptContrUpper + "(?=" + rsBreak + "|" + rsUpper + rsMiscLower + "|" + "$)", // Miscellaneous uppercase characters with optional contractions + rsUpper + "?" + rsMiscLower + "+" + rsOptContrLower, // Miscellaneous lowercase sequences with optional contractions + rsUpper + "+" + rsOptContrUpper, // All uppercase words with optional contractions (e.g., "THIS") + rsOrdUpper, // Ordinals for uppercase (e.g., "1ST", "2ND") + rsOrdLower, // Ordinals for lowercase (e.g., "1st", "2nd") + rsDigit + "+", // Pure digits (e.g., "123") + rsEmoji, // Emojis (e.g., 😀, ❤️) + }, + "|", + ), + regexp2.None, + ) + reAsciiWord = regexp2.MustCompile(`[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+`, regexp2.None) + reHasUnicodeWord = regexp2.MustCompile( + `[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]`, regexp2.None, + ) +) diff --git a/warehouse/transformer/internal/snakecase/snakecase.go b/warehouse/transformer/internal/snakecase/snakecase.go new file mode 100644 index 00000000000..4ee544876dd --- /dev/null +++ b/warehouse/transformer/internal/snakecase/snakecase.go @@ -0,0 +1,27 @@ +package snakecase + +import ( + "strings" + + "github.com/samber/lo" +) + +// ToSnakeCase converts a string to snake_case using regular word separation. +func ToSnakeCase(s string) string { + return snakeCase(s, extractWords) +} + +// ToSnakeCaseWithNumbers converts a string to snake_case, preserving numbers. +func ToSnakeCaseWithNumbers(s string) string { + return snakeCase(s, extractWordsWithNumbers) +} + +// snakeCase converts a string to snake_case based on a word extraction function. +func snakeCase(s string, wordExtractor func(s string) []string) string { + s = strings.NewReplacer("'", "", "\u2019", "").Replace(s) + words := wordExtractor(s) + words = lo.Map(words, func(word string, _ int) string { + return strings.ToLower(word) + }) + return strings.Join(words, "_") +} diff --git a/warehouse/transformer/internal/snakecase/snakecase_test.go b/warehouse/transformer/internal/snakecase/snakecase_test.go new file mode 100644 index 00000000000..50e0b7094f3 --- /dev/null +++ b/warehouse/transformer/internal/snakecase/snakecase_test.go @@ -0,0 +1,119 @@ +package snakecase + +import ( + "strings" + "testing" + "time" + + "github.com/samber/lo" + "github.com/stretchr/testify/require" +) + +var burredLetters = []rune{ + // Latin-1 Supplement letters. + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd8', + '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', '\xe0', + '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', '\xe8', + '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0', + '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf8', '\xf9', + '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', + + // Latin Extended-A letters. + '\u0100', '\u0101', '\u0102', '\u0103', '\u0104', '\u0105', '\u0106', '\u0107', + '\u0108', '\u0109', '\u010a', '\u010b', '\u010c', '\u010d', '\u010e', '\u010f', + '\u0110', '\u0111', '\u0112', '\u0113', '\u0114', '\u0115', '\u0116', '\u0117', + '\u0118', '\u0119', '\u011a', '\u011b', '\u011c', '\u011d', '\u011e', '\u011f', + '\u0120', '\u0121', '\u0122', '\u0123', '\u0124', '\u0125', '\u0126', '\u0127', + '\u0128', '\u0129', '\u012a', '\u012b', '\u012c', '\u012d', '\u012e', '\u012f', + '\u0130', '\u0131', '\u0132', '\u0133', '\u0134', '\u0135', '\u0136', '\u0137', + '\u0138', '\u0139', '\u013a', '\u013b', '\u013c', '\u013d', '\u013e', '\u013f', + '\u0140', '\u0141', '\u0142', '\u0143', '\u0144', '\u0145', '\u0146', '\u0147', + '\u0148', '\u0149', '\u014a', '\u014b', '\u014c', '\u014d', '\u014e', '\u014f', + '\u0150', '\u0151', '\u0152', '\u0153', '\u0154', '\u0155', '\u0156', '\u0157', + '\u0158', '\u0159', '\u015a', '\u015b', '\u015c', '\u015d', '\u015e', '\u015f', + '\u0160', '\u0161', '\u0162', '\u0163', '\u0164', '\u0165', '\u0166', '\u0167', + '\u0168', '\u0169', '\u016a', '\u016b', '\u016c', '\u016d', '\u016e', '\u016f', + '\u0170', '\u0171', '\u0172', '\u0173', '\u0174', '\u0175', '\u0176', '\u0177', + '\u0178', '\u0179', '\u017a', '\u017b', '\u017c', '\u017d', '\u017e', '\u017f', +} + +func TestToSnakeCase(t *testing.T) { + t.Run("extractWords", func(t *testing.T) { + t.Run("should match words containing Latin Unicode letters", func(t *testing.T) { + for _, letter := range burredLetters { + require.Equal(t, []string{string(letter)}, extractWords(string(letter))) + } + }) + t.Run("should work with compound words", func(t *testing.T) { + require.Equal(t, []string{"12", "ft"}, extractWords("12ft")) + require.Equal(t, []string{"aeiou", "Are", "Vowels"}, extractWords("aeiouAreVowels")) + require.Equal(t, []string{"enable", "6", "h", "format"}, extractWords("enable 6h format")) + require.Equal(t, []string{"enable", "24", "H", "format"}, extractWords("enable 24H format")) + require.Equal(t, []string{"is", "ISO", "8601"}, extractWords("isISO8601")) + require.Equal(t, []string{"LETTERS", "Aeiou", "Are", "Vowels"}, extractWords("LETTERSAeiouAreVowels")) + require.Equal(t, []string{"too", "Legit", "2", "Quit"}, extractWords("tooLegit2Quit")) + require.Equal(t, []string{"walk", "500", "Miles"}, extractWords("walk500Miles")) + require.Equal(t, []string{"xhr", "2", "Request"}, extractWords("xhr2Request")) + require.Equal(t, []string{"XML", "Http"}, extractWords("XMLHttp")) + require.Equal(t, []string{"Xml", "HTTP"}, extractWords("XmlHTTP")) + require.Equal(t, []string{"Xml", "Http"}, extractWords("XmlHttp")) + }) + t.Run("should work with compound words containing diacritical marks", func(t *testing.T) { + require.Equal(t, []string{"LETTERS", "Æiou", "Are", "Vowels"}, extractWords("LETTERSÆiouAreVowels")) + require.Equal(t, []string{"æiou", "Are", "Vowels"}, extractWords("æiouAreVowels")) + require.Equal(t, []string{"æiou", "2", "Consonants"}, extractWords("æiou2Consonants")) + }) + t.Run("should not treat contractions as separate words", func(t *testing.T) { + for _, apos := range []string{"'", string('\u2019')} { + t.Run("ToLower", func(t *testing.T) { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + actual := extractWords(strings.ToLower(input)) + expected := lo.Map([]string{"a", "b" + apos + postfix, "c"}, func(item string, index int) string { + return strings.ToLower(item) + }) + require.Equal(t, expected, actual) + } + }) + t.Run("ToUpper", func(t *testing.T) { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + actual := extractWords(strings.ToUpper(input)) + expected := lo.Map([]string{"a", "b" + apos + postfix, "c"}, func(item string, index int) string { + return strings.ToUpper(item) + }) + require.Equal(t, expected, actual) + } + }) + } + }) + t.Run("should not treat ordinal numbers as separate words", func(t *testing.T) { + ordinals := []string{"1st", "2nd", "3rd", "4th"} + for _, ordinal := range ordinals { + expected := []string{strings.ToLower(ordinal)} + actual := extractWords(strings.ToLower(ordinal)) + require.Equal(t, expected, actual) + + expected = []string{strings.ToUpper(ordinal)} + actual = extractWords(strings.ToUpper(ordinal)) + require.Equal(t, expected, actual) + } + }) + t.Run("should prevent ReDoS", func(t *testing.T) { + largeWordLen := 50000 + largeWord := strings.Repeat("A", largeWordLen) + maxMs := 1000 + startTime := time.Now() + + expected := []string{largeWord, "Æiou", "Are", "Vowels"} + actual := extractWords(largeWord + "ÆiouAreVowels") + require.Equal(t, expected, actual) + + endTime := time.Now() + timeSpent := endTime.Sub(startTime) + require.Less(t, timeSpent.Milliseconds(), int64(maxMs)) + }) + }) +} diff --git a/warehouse/transformer/internal/snakecase/words.go b/warehouse/transformer/internal/snakecase/words.go new file mode 100644 index 00000000000..1b7b383c471 --- /dev/null +++ b/warehouse/transformer/internal/snakecase/words.go @@ -0,0 +1,46 @@ +package snakecase + +import ( + "github.com/dlclark/regexp2" +) + +func extractWords(s string) []string { + if hasUnicodeWord(s) { + return unicodeWords(s) + } + return asciiWords(s) +} + +func hasUnicodeWord(s string) bool { + isMatch, _ := reHasUnicodeWord.MatchString(s) + return isMatch +} + +func extractWordsWithNumbers(s string) []string { + if hasUnicodeWord(s) { + return unicodeWordsWithNumbers(s) + } + return asciiWords(s) +} + +func unicodeWords(s string) []string { + return regexp2FindAllString(reUnicodeWords, s) +} + +func unicodeWordsWithNumbers(s string) []string { + return regexp2FindAllString(reUnicodeWordsWithNumbers, s) +} + +func asciiWords(s string) []string { + return regexp2FindAllString(reAsciiWord, s) +} + +func regexp2FindAllString(re *regexp2.Regexp, s string) []string { + var matches []string + m, _ := re.FindStringMatch(s) + for m != nil { + matches = append(matches, m.String()) + m, _ = re.FindNextMatch(m) + } + return matches +} diff --git a/warehouse/transformer/internal/utils/reservedkeywords.go b/warehouse/transformer/internal/utils/reservedkeywords.go new file mode 100644 index 00000000000..d454d7d93d9 --- /dev/null +++ b/warehouse/transformer/internal/utils/reservedkeywords.go @@ -0,0 +1,44 @@ +package utils + +import ( + "embed" + "encoding/json" + "log" + "strings" + + "github.com/samber/lo" +) + +var ( + //go:embed reservedkeywords.json + reservedKeywordsFile embed.FS + + reservedKeywords map[string]map[string]struct{} +) + +func init() { + reservedKeywords = loadReservedKeywords() +} + +func loadReservedKeywords() map[string]map[string]struct{} { + data, err := reservedKeywordsFile.ReadFile("reservedkeywords.json") + if err != nil { + log.Fatalf("failed to load reserved keywords: %v", err) + } + + var tempKeywords map[string][]string + if err := json.Unmarshal(data, &tempKeywords); err != nil { + log.Fatalf("failed to parse reserved keywords: %v", err) + } + + return lo.MapValues(tempKeywords, func(keywords []string, _ string) map[string]struct{} { + return lo.SliceToMap(keywords, func(k string) (string, struct{}) { + return k, struct{}{} + }) + }) +} + +func IsReservedKeyword(destType, keyword string) bool { + _, exists := reservedKeywords[destType][strings.ToUpper(keyword)] + return exists +} diff --git a/warehouse/transformer/internal/utils/reservedkeywords.json b/warehouse/transformer/internal/utils/reservedkeywords.json new file mode 100644 index 00000000000..120076b9f22 --- /dev/null +++ b/warehouse/transformer/internal/utils/reservedkeywords.json @@ -0,0 +1,2240 @@ +{ + "AZURE_DATALAKE": [ + "INTO", + "CONSTRAINT", + "CURRENT", + "READ", + "WITHIN", + "NO", + "ROWGUIDCOL", + "WITH", + "NCLOB", + "REGR_SYY", + "SETUSER", + "CONDITION", + "MAP", + "MINUTE", + "PARTITION", + "SYSTEM_USER", + "NULL", + "OBJECT", + "SEMANTICSIMILARITYDETAILSTABLE", + "SESSION", + "ELEMENT", + "HOUR", + "MOD", + "PUBLIC", + "SEMANTICSIMILARITYTABLE", + "EXIT", + "LINENO", + "SECURITYAUDIT", + "TRIM", + "VAR_POP", + "CORRESPONDING", + "CREATE", + "IMMEDIATE", + "XMLATTRIBUTES", + "CLUSTERED", + "DENY", + "LAST", + "OCCURRENCES_REGEX", + "REF", + "RELATIVE", + "SELECT", + "XMLEXISTS", + "BETWEEN", + "FREETEXTTABLE", + "PAD", + "WITHOUT", + "SYSTEM", + "WHENEVER", + "DEC", + "DIAGNOSTICS", + "GENERAL", + "LARGE", + "METHOD", + "ATOMIC", + "DETERMINISTIC", + "OLD", + "UNPIVOT", + "USING", + "DESCRIPTOR", + "BY", + "CALL", + "CONNECT", + "CYCLE", + "MIN", + "ABSOLUTE", + "DICTIONARY", + "MODIFY", + "POSITION_REGEX", + "SECOND", + "UNDER", + "CUME_DIST", + "EXTRACT", + "LEFT", + "NORMALIZE", + "REPLICATION", + "BOTH", + "BROWSE", + "EXEC", + "FORTRAN", + "LANGUAGE", + "RESTORE", + "NOCHECK", + "XMLTEXT", + "FOR", + "NEXT", + "SHUTDOWN", + "STRUCTURE", + "BINARY", + "CURRENT_TIMESTAMP", + "DATE", + "RETURN", + "SAVE", + "OVER", + "BEGIN", + "FULLTEXTTABLE", + "IGNORE", + "INPUT", + "ISOLATION", + "OUTPUT", + "DEFERRABLE", + "NONE", + "COMMIT", + "REGR_AVGX", + "XMLCAST", + "VAR_SAMP", + "AT", + "GLOBAL", + "IS", + "LN", + "OR", + "SEQUENCE", + "TABLESAMPLE", + "DOMAIN", + "FREE", + "IN", + "LOCAL", + "MEMBER", + "OPENROWSET", + "VARYING", + "WRITE", + "ARRAY", + "COLUMN", + "DYNAMIC", + "OUTER", + "DAY", + "DEALLOCATE", + "XMLPARSE", + "CHARACTER_LENGTH", + "PERCENT", + "TRAN", + "WRITETEXT", + "PROCEDURE", + "RECURSIVE", + "struct{}{}", + "CONTAINS", + "OUT", + "PLAN", + "CURRENT_SCHEMA", + "FALSE", + "PRIOR", + "RELEASE", + "TRUNCATE", + "XMLBINARY", + "ANY", + "DOUBLE", + "IDENTITY_INSERT", + "UPDATE", + "CHAR", + "LOCATOR", + "ORDINALITY", + "TSEQUAL", + "XMLDOCUMENT", + "CURRENT_ROLE", + "KILL", + "SMALLINT", + "EACH", + "MODULE", + "RECONFIGURE", + "SUBMULTISET", + "THEN", + "PASCAL", + "CONNECTION", + "LATERAL", + "STATEMENT", + "VALUE", + "BIT_LENGTH", + "XMLTABLE", + "POSITION", + "SIMILAR", + "SOME", + "UNIQUE", + "BEFORE", + "LOCALTIMESTAMP", + "OPENQUERY", + "ORDER", + "ROLE", + "SESSION_USER", + "LEVEL", + "PREORDER", + "FUNCTION", + "GROUP", + "IDENTITY", + "XMLAGG", + "XMLQUERY", + "CASCADED", + "CAST", + "CHECK", + "RETURNS", + "CASE", + "CONTAINSTABLE", + "LOCALTIME", + "CALLED", + "EQUALS", + "READTEXT", + "REGR_SLOPE", + "WITHINGROUP", + "AGGREGATE", + "COLLATE", + "INTERSECTION", + "OVERLAPS", + "STDDEV_POP", + "UNKNOWN", + "LOWER", + "READS", + "MATCH", + "MERGE", + "TRY_CONVERT", + "CORR", + "END", + "HAVING", + "SYMMETRIC", + "UNION", + "ADD", + "DISCONNECT", + "RULE", + "STATISTICS", + "WAITFOR", + "FIRST", + "INTERSECT", + "PERCENT_RANK", + "AFTER", + "CATALOG", + "DESTROY", + "INSENSITIVE", + "SIZE", + "ARE", + "OFFSETS", + "REGR_COUNT", + "ALTER", + "AS", + "COMPLETION", + "COVAR_POP", + "FILE", + "SQLSTATE", + "LESS", + "MONTH", + "ROUTINE", + "CHARACTER", + "CROSS", + "REGR_SXX", + "VARIABLE", + "CONVERT", + "FREETEXT", + "REAL", + "TIMEZONE_MINUTE", + "TRANSLATE", + "REVOKE", + "VALUES", + "BREAK", + "COLLECT", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "DATA", + "INOUT", + "STATE", + "TRANSLATION", + "MAX", + "SUM", + "XMLCOMMENT", + "COLLATION", + "INITIALIZE", + "LIMIT", + "REGR_R2", + "SETS", + "SPECIFIC", + "ASYMMETRIC", + "COALESCE", + "SEMANTICKEYPHRASETABLE", + "SUBSTRING", + "TERMINATE", + "OPENXML", + "PARAMETERS", + "RESULT", + "CURRENT_CATALOG", + "GROUPING", + "DESCRIBE", + "DESTRUCTOR", + "DISTRIBUTED", + "BACKUP", + "CLASS", + "DELETE", + "FOREIGN", + "DBCC", + "END-EXEC", + "EXCEPT", + "IF", + "PRIVILEGES", + "TIME", + "WIDTH_BUCKET", + "ALIAS", + "CARDINALITY", + "DEFERRED", + "EXTERNAL", + "IDENTITYCOL", + "INDEX", + "NAMES", + "XMLSERIALIZE", + "AVG", + "CURRENT_USER", + "HOST", + "NULLIF", + "OPTION", + "REVERT", + "USER", + "NUMERIC", + "PRINT", + "REGR_INTERCEPT", + "ALL", + "CASCADE", + "ELSE", + "PRESERVE", + "SENSITIVE", + "HOLD", + "REFERENCES", + "SQLCA", + "ACTION", + "DECLARE", + "LIKE", + "PIVOT", + "TRANSLATE_REGEX", + "UNNEST", + "AUTHORIZATION", + "COMPUTE", + "DEPTH", + "JOIN", + "OVERLAY", + "XMLNAMESPACES", + "LEADING", + "PERCENTILE_DISC", + "USE", + "SQL", + "VARCHAR", + "ADMIN", + "EXECUTE", + "REFERENCING", + "SQLCODE", + "DEREF", + "ERRLVL", + "INDICATOR", + "LOAD", + "UPPER", + "CLOSE", + "CONSTRAINTS", + "DECIMAL", + "INITIALLY", + "NATIONAL", + "NCHAR", + "SCHEMA", + "CHAR_LENGTH", + "DATABASE", + "DUMP", + "LIKE_REGEX", + "TO", + "PROC", + "BLOB", + "FUSION", + "INNER", + "RIGHT", + "BOOLEAN", + "CURRENT_DATE", + "FILLFACTOR", + "ROWCOUNT", + "XMLVALIDATE", + "CHECKPOINT", + "EXCEPTION", + "NATURAL", + "ROLLBACK", + "STDDEV_SAMP", + "WINDOW", + "CUBE", + "CURRENT_PATH", + "PREFIX", + "TEXTSIZE", + "WHERE", + "DISK", + "EVERY", + "INSERT", + "OCTET_LENGTH", + "WHILE", + "ASENSITIVE", + "DROP", + "HOLDLOCK", + "BULK", + "ONLY", + "CONTINUE", + "COUNT", + "KEY", + "BIT", + "OF", + "PRECISION", + "TEMPORARY", + "MULTISET", + "PARAMETER", + "RESTRICT", + "SPACE", + "TRAILING", + "CLOB", + "DISTINCT", + "ESCAPE", + "FETCH", + "GRANT", + "TRANSACTION", + "ASSERTION", + "FROM", + "MODIFIES", + "SQLWARNING", + "XMLELEMENT", + "THAN", + "ASC", + "EXISTS", + "NEW", + "OFF", + "PERCENTILE_CONT", + "PRIMARY", + "ROW", + "TREAT", + "CURRENT_TIME", + "ITERATE", + "SET", + "TOP", + "XMLPI", + "OPEN", + "OPENDATASOURCE", + "PARTIAL", + "PREPARE", + "REGR_AVGY", + "SQLERROR", + "STATIC", + "ADA", + "RANGE", + "SCOPE", + "GO", + "NONCLUSTERED", + "UPDATETEXT", + "VIEW", + "INT", + "AND", + "COVAR_SAMP", + "DESC", + "NOT", + "ON", + "TABLE", + "INCLUDE", + "SEARCH", + "SPECIFICTYPE", + "SQLEXCEPTION", + "TRIGGER", + "GET", + "START", + "DEFAULT", + "FLOAT", + "FOUND", + "FULL", + "USAGE", + "GOTO", + "RAISERROR", + "SUBSTRING_REGEX", + "ZONE", + "BREADTH", + "CURSOR", + "OPERATION", + "TIMEZONE_HOUR", + "UESCAPE", + "XMLCONCAT", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "ROLLUP", + "INTEGER", + "ROWS", + "SAVEPOINT", + "XMLITERATE", + "YEAR", + "ALLOCATE", + "REGR_SXY", + "POSTFIX", + "SECTION", + "WORK", + "INTERVAL", + "SCROLL", + "WHEN", + "FILTER", + "XMLFOREST" + ], + "AZURE_SYNAPSE": [ + "CAST", + "GROUPING", + "PIVOT", + "ARE", + "ATOMIC", + "ORDINALITY", + "OVERLAY", + "SIZE", + "AFTER", + "EVERY", + "GROUP", + "REGR_SXX", + "RESTRICT", + "SEARCH", + "STDDEV_POP", + "COLLECT", + "KEY", + "LOCAL", + "FROM", + "TEMPORARY", + "struct{}{}", + "TSEQUAL", + "WAITFOR", + "BEFORE", + "DEFERRED", + "STATE", + "XMLDOCUMENT", + "ALLOCATE", + "INOUT", + "SYSTEM_USER", + "CALL", + "CALLED", + "LIMIT", + "INNER", + "NONE", + "REGR_SXY", + "WHILE", + "XMLQUERY", + "AGGREGATE", + "DICTIONARY", + "INITIALIZE", + "REVERT", + "SIMILAR", + "FOUND", + "MODIFIES", + "REFERENCES", + "CONVERT", + "PRESERVE", + "DISTRIBUTED", + "RULE", + "READ", + "SEQUENCE", + "USE", + "BREAK", + "CURRENT_ROLE", + "INTERVAL", + "POSITION", + "REFERENCING", + "TRY_CONVERT", + "WHERE", + "CURRENT_DATE", + "CURRENT_USER", + "INSERT", + "MIN", + "PLAN", + "SUBMULTISET", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "DATABASE", + "FILE", + "MERGE", + "OPTION", + "OFFSETS", + "UPPER", + "AND", + "CHARACTER", + "IDENTITY", + "PRIMARY", + "SELECT", + "CASCADED", + "DEALLOCATE", + "SECOND", + "SQL", + "TABLE", + "TIMEZONE_MINUTE", + "NATIONAL", + "NATURAL", + "ROLE", + "ERRLVL", + "HAVING", + "ONLY", + "OFF", + "SEMANTICSIMILARITYTABLE", + "STDDEV_SAMP", + "TRANSLATION", + "CLASS", + "CURRENT_TIMESTAMP", + "ESCAPE", + "WINDOW", + "WITHOUT", + "ASSERTION", + "COVAR_POP", + "OLD", + "BETWEEN", + "FREETEXT", + "NORMALIZE", + "PREORDER", + "CONTAINSTABLE", + "MAX", + "PARAMETERS", + "ANY", + "BIT", + "INTO", + "FILLFACTOR", + "IF", + "NCLOB", + "GENERAL", + "MAP", + "BOOLEAN", + "COMMIT", + "CROSS", + "SUBSTRING_REGEX", + "THAN", + "CONNECT", + "EXEC", + "RIGHT", + "SAVEPOINT", + "DEC", + "EXTERNAL", + "PARTIAL", + "DESCRIPTOR", + "ORDER", + "REGR_R2", + "PREFIX", + "SESSION", + "COLLATE", + "DEFAULT", + "XMLITERATE", + "CHARACTER_LENGTH", + "LANGUAGE", + "REPLICATION", + "SET", + "SPACE", + "LEADING", + "LOCATOR", + "SECTION", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "PAD", + "OVERLAPS", + "SCROLL", + "INPUT", + "ROUTINE", + "DESCRIBE", + "FREE", + "IN", + "IDENTITY_INSERT", + "OUTER", + "SCOPE", + "AT", + "COLLATION", + "FUNCTION", + "RESTORE", + "SEMANTICSIMILARITYDETAILSTABLE", + "VARCHAR", + "COALESCE", + "MODULE", + "REGR_AVGY", + "DOUBLE", + "SEMANTICKEYPHRASETABLE", + "ISOLATION", + "OVER", + "INITIALLY", + "LIKE_REGEX", + "SETS", + "VALUE", + "CARDINALITY", + "CURSOR", + "INCLUDE", + "PARTITION", + "CURRENT_SCHEMA", + "EXISTS", + "INTEGER", + "PASCAL", + "SQLSTATE", + "SQLWARNING", + "BACKUP", + "CHAR", + "NULLIF", + "HOLDLOCK", + "LEVEL", + "SPECIFIC", + "TRANSLATE", + "CURRENT_TIME", + "END", + "PROC", + "XMLEXISTS", + "OR", + "READS", + "TRANSLATE_REGEX", + "EXCEPTION", + "FILTER", + "OPENQUERY", + "PRECISION", + "TRUNCATE", + "ADA", + "CLOSE", + "EACH", + "INT", + "OPENXML", + "DUMP", + "FUSION", + "GET", + "XMLPARSE", + "DBCC", + "FIRST", + "REF", + "BIT_LENGTH", + "PERCENT", + "REGR_COUNT", + "XMLTABLE", + "PRINT", + "ROWS", + "WIDTH_BUCKET", + "XMLCAST", + "RELEASE", + "ROW", + "SCHEMA", + "EQUALS", + "OUT", + "DEFERRABLE", + "DISTINCT", + "DROP", + "PREPARE", + "SAVE", + "ASC", + "LOCALTIME", + "NEW", + "LARGE", + "SYSTEM", + "WRITE", + "PUBLIC", + "RECURSIVE", + "HOLD", + "MOD", + "NAMES", + "SHUTDOWN", + "CHECKPOINT", + "CLOB", + "FOREIGN", + "CURRENT", + "RANGE", + "ABSOLUTE", + "MODIFY", + "XMLSERIALIZE", + "TRANSACTION", + "UNION", + "DELETE", + "EXIT", + "IS", + "LAST", + "ADMIN", + "DECIMAL", + "KILL", + "XMLCOMMENT", + "COVAR_SAMP", + "IDENTITYCOL", + "SQLCA", + "OPENROWSET", + "REGR_SLOPE", + "XMLELEMENT", + "BREADTH", + "DEPTH", + "INDEX", + "MATCH", + "PERCENT_RANK", + "XMLATTRIBUTES", + "CUBE", + "EXTRACT", + "PRIOR", + "UNPIVOT", + "LEFT", + "LOCALTIMESTAMP", + "OPERATION", + "LN", + "POSITION_REGEX", + "SMALLINT", + "DATA", + "SPECIFICTYPE", + "XMLAGG", + "TOP", + "TRIGGER", + "ASENSITIVE", + "NONCLUSTERED", + "SENSITIVE", + "BY", + "INTERSECT", + "LINENO", + "ROLLBACK", + "CONTAINS", + "EXECUTE", + "LIKE", + "ROWGUIDCOL", + "SECURITYAUDIT", + "XMLBINARY", + "ARRAY", + "BLOB", + "ELEMENT", + "ALTER", + "MONTH", + "XMLVALIDATE", + "ASYMMETRIC", + "DESC", + "SETUSER", + "OCCURRENCES_REGEX", + "RETURN", + "TO", + "DIAGNOSTICS", + "DISK", + "METHOD", + "BROWSE", + "MULTISET", + "FOR", + "VARIABLE", + "POSTFIX", + "PROCEDURE", + "ROWCOUNT", + "VARYING", + "DECLARE", + "HOUR", + "INTERSECTION", + "INDICATOR", + "LOWER", + "ZONE", + "NEXT", + "SUBSTRING", + "XMLNAMESPACES", + "COMPLETION", + "JOIN", + "BEGIN", + "CUME_DIST", + "DYNAMIC", + "VAR_POP", + "WITHINGROUP", + "WORK", + "XMLFOREST", + "ACTION", + "OBJECT", + "PERCENTILE_CONT", + "ALL", + "CHAR_LENGTH", + "TIMEZONE_HOUR", + "COLUMN", + "FULLTEXTTABLE", + "NULL", + "TREAT", + "UNIQUE", + "FORTRAN", + "OCTET_LENGTH", + "RAISERROR", + "CASE", + "SQLCODE", + "WRITETEXT", + "OUTPUT", + "AS", + "CORR", + "LATERAL", + "REGR_SYY", + "SUM", + "TRAILING", + "CURRENT_CATALOG", + "DESTROY", + "ELSE", + "UNNEST", + "BINARY", + "CORRESPONDING", + "DETERMINISTIC", + "CASCADE", + "INSENSITIVE", + "VALUES", + "END-EXEC", + "GLOBAL", + "UNDER", + "RECONFIGURE", + "BOTH", + "ITERATE", + "NO", + "DESTRUCTOR", + "REVOKE", + "WITH", + "CYCLE", + "EXCEPT", + "USAGE", + "CHECK", + "LESS", + "ADD", + "AVG", + "REGR_INTERCEPT", + "TABLESAMPLE", + "IGNORE", + "NOT", + "SOME", + "FULL", + "GOTO", + "OF", + "STATISTICS", + "TIME", + "CLUSTERED", + "DATE", + "DAY", + "VAR_SAMP", + "DISCONNECT", + "OPENDATASOURCE", + "START", + "SQLERROR", + "UNKNOWN", + "XMLTEXT", + "COMPUTE", + "HOST", + "ON", + "SYMMETRIC", + "DOMAIN", + "GRANT", + "NCHAR", + "FREETEXTTABLE", + "CURRENT_PATH", + "RETURNS", + "UPDATETEXT", + "YEAR", + "CONSTRAINT", + "READTEXT", + "THEN", + "CONTINUE", + "MEMBER", + "SESSION_USER", + "ROLLUP", + "TERMINATE", + "UPDATE", + "XMLCONCAT", + "DEREF", + "IMMEDIATE", + "REGR_AVGX", + "CONSTRAINTS", + "PRIVILEGES", + "FALSE", + "LOAD", + "RELATIVE", + "CONDITION", + "OPEN", + "TRAN", + "ALIAS", + "CONNECTION", + "WHENEVER", + "SQLEXCEPTION", + "COUNT", + "NOCHECK", + "PERCENTILE_DISC", + "UESCAPE", + "DENY", + "FETCH", + "TRIM", + "CATALOG", + "GO", + "STATIC", + "STATEMENT", + "USER", + "VIEW", + "WITHIN", + "AUTHORIZATION", + "FLOAT", + "RESULT", + "TEXTSIZE", + "USING", + "WHEN", + "MINUTE", + "REAL", + "STRUCTURE", + "PARAMETER", + "XMLPI", + "BULK", + "CREATE", + "NUMERIC" + ], + "BQ": [ + "ALL", + "FROM", + "NATURAL", + "THEN", + "UNBOUNDED", + "USING", + "ARRAY", + "CURRENT", + "IS", + "NULLS", + "TO", + "TREAT", + "RANGE", + "BY", + "CONTAINS", + "END", + "GROUP", + "GROUPING", + "HAVING", + "CAST", + "FETCH", + "FULL", + "INTERSECT", + "PARTITION", + "ENUM", + "FOR", + "LATERAL", + "RECURSIVE", + "CASE", + "NULL", + "UNION", + "INNER", + "NO", + "CREATE", + "CROSS", + "CUBE", + "EXCLUDE", + "GROUPS", + "IN", + "WINDOW", + "ANY", + "DESC", + "ESCAPE", + "JOIN", + "SOME", + "IF", + "LOOKUP", + "DEFINE", + "DISTINCT", + "EXCEPT", + "EXISTS", + "FOLLOWING", + "HASH", + "ROLLUP", + "ROWS", + "WHERE", + "ELSE", + "BETWEEN", + "NEW", + "OUTER", + "OVER", + "RIGHT", + "WITH", + "FALSE", + "IGNORE", + "LEFT", + "RESPECT", + "SET", + "EXTRACT", + "OF", + "OR", + "PRECEDING", + "TABLESAMPLE", + "UNNEST", + "AND", + "ASSERT_ROWS_MODIFIED", + "NOT", + "struct{}{}", + "ORDER", + "PROTO", + "AT", + "COLLATE", + "INTERVAL", + "INTO", + "LIKE", + "ON", + "STRUCT", + "WITHIN", + "WHEN", + "AS", + "ASC", + "DEFAULT", + "LIMIT", + "MERGE", + "SELECT" + ], + "CLICKHOUSE": [], + "DELTALAKE": [ + "INTERVAL", + "NO", + "TO", + "USER", + "CREATE", + "OUTER", + "FALSE", + "FETCH", + "WHEN", + "WITH", + "START", + "CUBE", + "CURRENT", + "GROUP", + "LATERAL", + "PRIMARY", + "ROW", + "DESCRIBE", + "DISTINCT", + "EVENT_DATE", + "NATURAL", + "OR", + "THEN", + "AT", + "CURRENT_DATE", + "INTERSECT", + "ROLLBACK", + "UNION", + "CASE", + "FROM", + "INNER", + "ORDER", + "ROWS", + "OF", + "REVOKE", + "SET", + "AS", + "END", + "BOTH", + "CURRENT_TIME", + "UPDATE", + "USING", + "GLOBAL", + "VALUES", + "EXCEPT", + "OVERLAPS", + "TRUNCATE", + "BETWEEN", + "FOREIGN", + "JOIN", + "ONLY", + "ANY", + "COLUMN", + "ELSE", + "FULL", + "GROUPING", + "POSITION", + "SELECT", + "ANTI", + "UNIQUE", + "EXTRACT", + "FUNCTION", + "UNKNOWN", + "ALL", + "AUTHORIZATION", + "LOCAL", + "OUT", + "ROLLUP", + "TABLESAMPLE", + "EXISTS", + "IS", + "SESSION_USER", + "struct{}{}", + "ESCAPE", + "EXTERNAL", + "MINUS", + "NOT", + "REFERENCES", + "CONSTRAINT", + "FILTER", + "INTO", + "ALTER", + "DELETE", + "LEADING", + "NULL", + "RANGE", + "TIME", + "AND", + "DROP", + "HAVING", + "IN", + "CURRENT_USER", + "CROSS", + "ON", + "WHERE", + "COMMIT", + "LEFT", + "PARTITION", + "TRAILING", + "SOME", + "TABLE", + "BY", + "CAST", + "GRANT", + "INSERT", + "LIKE", + "RIGHT", + "ARRAY", + "CHECK", + "COLLATE", + "FOR", + "SEMI", + "WINDOW", + "CURRENT_TIMESTAMP" + ], + "GCS_DATALAKE": [ + "WHEN", + "DEFINE", + "DISTINCT", + "FROM", + "STRUCT", + "UNION", + "struct{}{}", + "USING", + "DEFAULT", + "HASH", + "NEW", + "ORDER", + "PRECEDING", + "ROWS", + "CURRENT", + "EXISTS", + "JOIN", + "LATERAL", + "RECURSIVE", + "INTO", + "IS", + "OR", + "PROTO", + "NATURAL", + "OUTER", + "UNBOUNDED", + "ASSERT_ROWS_MODIFIED", + "CASE", + "COLLATE", + "ENUM", + "EXTRACT", + "WHERE", + "AND", + "CAST", + "MERGE", + "GROUPS", + "INNER", + "NOT", + "OF", + "ROLLUP", + "FOLLOWING", + "FOR", + "LOOKUP", + "ARRAY", + "BY", + "CREATE", + "CROSS", + "END", + "NULL", + "ASC", + "IGNORE", + "UNNEST", + "WITH", + "AT", + "GROUP", + "INTERSECT", + "RIGHT", + "THEN", + "PARTITION", + "TABLESAMPLE", + "WINDOW", + "CUBE", + "FETCH", + "INTERVAL", + "LIKE", + "OVER", + "SOME", + "TO", + "TREAT", + "CONTAINS", + "DESC", + "LEFT", + "RESPECT", + "SELECT", + "WITHIN", + "ALL", + "IF", + "LIMIT", + "AS", + "ELSE", + "RANGE", + "ON", + "SET", + "ESCAPE", + "EXCLUDE", + "FALSE", + "HAVING", + "IN", + "NO", + "NULLS", + "ANY", + "BETWEEN", + "EXCEPT", + "FULL", + "GROUPING" + ], + "MSSQL": [ + "END-EXEC", + "LOCATOR", + "XMLSERIALIZE", + "CONTAINSTABLE", + "CURRENT_ROLE", + "LEADING", + "UESCAPE", + "LAST", + "CONNECT", + "REGR_AVGX", + "TRANSLATE_REGEX", + "WITHIN", + "XMLEXISTS", + "ASC", + "BIT_LENGTH", + "XMLCAST", + "XMLCOMMENT", + "XMLFOREST", + "DESCRIPTOR", + "FREETEXT", + "LARGE", + "CONSTRAINT", + "FOREIGN", + "INITIALLY", + "LINENO", + "SMALLINT", + "VALUE", + "DATA", + "SECTION", + "TRANSLATE", + "UNKNOWN", + "BIT", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "GENERAL", + "LOCALTIME", + "NATIONAL", + "UNDER", + "ADMIN", + "GRANT", + "LIMIT", + "LOCAL", + "MERGE", + "NEW", + "WORK", + "NCHAR", + "NULLIF", + "ROWGUIDCOL", + "SEMANTICSIMILARITYDETAILSTABLE", + "SUBSTRING", + "WRITETEXT", + "FILLFACTOR", + "IF", + "NOCHECK", + "PERCENTILE_DISC", + "RECURSIVE", + "TREAT", + "READTEXT", + "WRITE", + "NCLOB", + "RETURNS", + "XMLBINARY", + "DECLARE", + "FETCH", + "NULL", + "REVOKE", + "STATE", + "SYMMETRIC", + "CLUSTERED", + "COVAR_POP", + "CURSOR", + "IN", + "NOT", + "SIZE", + "POSITION_REGEX", + "ARRAY", + "DISTRIBUTED", + "DROP", + "OPENROWSET", + "PREPARE", + "SECOND", + "UNNEST", + "DISCONNECT", + "THEN", + "EXCEPTION", + "FREE", + "OVERLAY", + "SCROLL", + "ROW", + "CURRENT", + "IGNORE", + "LESS", + "MODIFIES", + "OPENQUERY", + "RANGE", + "DEC", + "OFFSETS", + "SQL", + "AFTER", + "CLASS", + "CHAR_LENGTH", + "MONTH", + "REFERENCES", + "REGR_COUNT", + "ROLLBACK", + "ANY", + "CURRENT_SCHEMA", + "RESTRICT", + "TIME", + "XMLQUERY", + "XMLVALIDATE", + "WHERE", + "ASSERTION", + "ELSE", + "NO", + "PARAMETERS", + "PREORDER", + "USAGE", + "CURRENT_TIMESTAMP", + "DISK", + "SIMILAR", + "SYSTEM_USER", + "CHAR", + "CONNECTION", + "DELETE", + "KILL", + "XMLELEMENT", + "XMLTABLE", + "DISTINCT", + "EXCEPT", + "HOLDLOCK", + "OLD", + "PRESERVE", + "STATEMENT", + "CAST", + "COLLATION", + "COUNT", + "OCCURRENCES_REGEX", + "START", + "SUBMULTISET", + "SUM", + "CONDITION", + "DESC", + "LOWER", + "REGR_R2", + "SOME", + "SQLCODE", + "WINDOW", + "ACTION", + "INTERSECT", + "REGR_SXX", + "DAY", + "DESCRIBE", + "TERMINATE", + "WITHOUT", + "DEALLOCATE", + "DENY", + "REGR_SYY", + "USER", + "XMLPI", + "PUBLIC", + "TRY_CONVERT", + "ALTER", + "AS", + "OCTET_LENGTH", + "RESTORE", + "RULE", + "SPECIFICTYPE", + "CASE", + "EXTERNAL", + "KEY", + "XMLCONCAT", + "EQUALS", + "INPUT", + "LEVEL", + "ONLY", + "READ", + "SQLERROR", + "SEARCH", + "SENSITIVE", + "DEFERRED", + "INDICATOR", + "PAD", + "STATISTICS", + "XMLITERATE", + "EACH", + "LOCALTIMESTAMP", + "PREFIX", + "REFERENCING", + "ADA", + "COLLECT", + "DBCC", + "MAP", + "METHOD", + "OPENDATASOURCE", + "REF", + "ABSOLUTE", + "CUBE", + "DIAGNOSTICS", + "MULTISET", + "RETURN", + "WHEN", + "GOTO", + "BEFORE", + "CHARACTER_LENGTH", + "OR", + "PLAN", + "SPECIFIC", + "TO", + "LOAD", + "MAX", + "OPTION", + "SCOPE", + "STDDEV_SAMP", + "XMLDOCUMENT", + "CARDINALITY", + "TIMEZONE_MINUTE", + "TRIGGER", + "VIEW", + "IDENTITY", + "STDDEV_POP", + "ADD", + "ALLOCATE", + "DOMAIN", + "RESULT", + "ROWCOUNT", + "TSEQUAL", + "TABLE", + "XMLTEXT", + "ALIAS", + "ASENSITIVE", + "CURRENT_TIME", + "FILE", + "PRINT", + "ROLLUP", + "THAN", + "TRIM", + "IDENTITY_INSERT", + "ON", + "SESSION", + "UNIQUE", + "YEAR", + "VARCHAR", + "CHECKPOINT", + "DETERMINISTIC", + "EVERY", + "MATCH", + "NUMERIC", + "PARAMETER", + "DYNAMIC", + "HAVING", + "INITIALIZE", + "REGR_AVGY", + "CURRENT_DATE", + "EXISTS", + "PERCENTILE_CONT", + "ARE", + "END", + "GROUPING", + "SETUSER", + "SQLWARNING", + "BREAK", + "DECIMAL", + "LANGUAGE", + "PARTITION", + "SEQUENCE", + "SETS", + "TRANSACTION", + "struct{}{}", + "XMLAGG", + "BOOLEAN", + "BY", + "CLOSE", + "CONTAINS", + "INNER", + "VALUES", + "OPENXML", + "PRIOR", + "TABLESAMPLE", + "TRANSLATION", + "BETWEEN", + "CUME_DIST", + "ELEMENT", + "EXEC", + "HOST", + "LATERAL", + "USING", + "CONTINUE", + "INDEX", + "NONE", + "SELECT", + "REGR_SLOPE", + "WHILE", + "AND", + "DICTIONARY", + "MIN", + "PERCENT_RANK", + "UPDATE", + "UPDATETEXT", + "TRUNCATE", + "CASCADE", + "CONVERT", + "DEFERRABLE", + "FILTER", + "FLOAT", + "NAMES", + "VAR_POP", + "DESTRUCTOR", + "ISOLATION", + "OFF", + "OUTPUT", + "VAR_SAMP", + "CONSTRAINTS", + "SQLSTATE", + "CHARACTER", + "OUTER", + "TIMEZONE_HOUR", + "CALLED", + "CYCLE", + "INSENSITIVE", + "TEXTSIZE", + "ATOMIC", + "COMPLETION", + "PROCEDURE", + "REGR_INTERCEPT", + "BREADTH", + "FULLTEXTTABLE", + "DATABASE", + "DEREF", + "WITH", + "SPACE", + "VARIABLE", + "COALESCE", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "ESCAPE", + "FORTRAN", + "OVERLAPS", + "BINARY", + "CATALOG", + "CREATE", + "UNION", + "WAITFOR", + "LN", + "SCHEMA", + "SYSTEM", + "INTERSECTION", + "ROWS", + "CALL", + "UPPER", + "UNPIVOT", + "BLOB", + "FALSE", + "GET", + "INSERT", + "LEFT", + "PIVOT", + "TEMPORARY", + "USE", + "PRIMARY", + "SEMANTICKEYPHRASETABLE", + "MEMBER", + "COVAR_SAMP", + "CURRENT_PATH", + "SECURITYAUDIT", + "BOTH", + "OF", + "WIDTH_BUCKET", + "BROWSE", + "CORR", + "PRIVILEGES", + "READS", + "CURRENT_CATALOG", + "SHUTDOWN", + "FOR", + "FUSION", + "IDENTITYCOL", + "MODIFY", + "ALL", + "XMLPARSE", + "COMMIT", + "RIGHT", + "XMLATTRIBUTES", + "XMLNAMESPACES", + "TRAILING", + "TRAN", + "BULK", + "INTEGER", + "RAISERROR", + "FOUND", + "INCLUDE", + "VARYING", + "DATE", + "FROM", + "JOIN", + "STRUCTURE", + "AVG", + "DEFAULT", + "GO", + "NATURAL", + "ORDINALITY", + "SAVEPOINT", + "EXIT", + "GROUP", + "WHENEVER", + "AGGREGATE", + "DOUBLE", + "EXECUTE", + "POSTFIX", + "CURRENT_USER", + "DESTROY", + "FIRST", + "ROUTINE", + "TOP", + "REAL", + "MODULE", + "ASYMMETRIC", + "CROSS", + "FREETEXTTABLE", + "PERCENT", + "INT", + "OVER", + "PROC", + "SUBSTRING_REGEX", + "WITHINGROUP", + "FULL", + "FUNCTION", + "NORMALIZE", + "RELEASE", + "REPLICATION", + "SAVE", + "HOUR", + "LIKE_REGEX", + "NONCLUSTERED", + "SQLEXCEPTION", + "ZONE", + "AT", + "COLUMN", + "EXTRACT", + "MINUTE", + "MOD", + "CASCADED", + "COMPUTE", + "GLOBAL", + "OPEN", + "ORDER", + "PARTIAL", + "PRECISION", + "SQLCA", + "AUTHORIZATION", + "DEPTH", + "INOUT", + "INTO", + "OBJECT", + "OUT", + "STATIC", + "ITERATE", + "REGR_SXY", + "SET", + "CLOB", + "LIKE", + "DUMP", + "ERRLVL", + "POSITION", + "RECONFIGURE", + "BEGIN", + "CHECK", + "INTERVAL", + "NEXT", + "RELATIVE", + "BACKUP", + "CORRESPONDING", + "HOLD", + "IMMEDIATE", + "SEMANTICSIMILARITYTABLE", + "PASCAL", + "SESSION_USER", + "COLLATE", + "IS", + "OPERATION", + "REVERT", + "ROLE" + ], + "POSTGRES": [ + "USING", + "BINARY", + "DESC", + "EXCEPT", + "FOREIGN", + "NULL", + "REFERENCES", + "SYMMETRIC", + "struct{}{}", + "ANY", + "CROSS", + "DEFAULT", + "DISTINCT", + "FALSE", + "OLD", + "SESSION_USER", + "TABLE", + "CHECK", + "CURRENT_TIME", + "FOR", + "FREEZE", + "LIMIT", + "LOCALTIME", + "NOT", + "ARRAY", + "COLLATE", + "OFF", + "OFFSET", + "BETWEEN", + "CONSTRAINT", + "CURRENT_TIMESTAMP", + "SIMILAR", + "UNION", + "LEADING", + "LIKE", + "ON", + "UNIQUE", + "AUTHORIZATION", + "CURRENT_DATE", + "ILIKE", + "INNER", + "VERBOSE", + "CURRENT_USER", + "IN", + "NOTNULL", + "ORDER", + "THEN", + "TRAILING", + "WHEN", + "LOCALTIMESTAMP", + "NEW", + "OUTER", + "JOIN", + "ASC", + "DO", + "FROM", + "HAVING", + "INTERSECT", + "INTO", + "ELSE", + "ALL", + "ANALYSE", + "AS", + "ASYMMETRIC", + "CAST", + "CREATE", + "DEFERRABLE", + "FULL", + "ISNULL", + "OR", + "PRIMARY", + "RIGHT", + "END", + "GRANT", + "OVERLAPS", + "PLACING", + "USER", + "WHERE", + "ANALYZE", + "BOTH", + "GROUP", + "INITIALLY", + "IS", + "NATURAL", + "SOME", + "AND", + "CASE", + "COLUMN", + "CURRENT_ROLE", + "LEFT", + "ONLY", + "SELECT" + ], + "RS": [ + "TAG", + "AES128", + "OFFSET", + "OLD", + "PLACING", + "THEN", + "struct{}{}", + "WALLET", + "CREATE", + "DEFAULT", + "DEFRAG", + "LUNS", + "TEXT255", + "LOCALTIME", + "LZO", + "RESPECT", + "WHERE", + "ASC", + "DEFERRABLE", + "DELTA32K", + "AND", + "CURRENT_TIMESTAMP", + "OPEN", + "WITH", + "ALL", + "ENABLE", + "EXCEPT", + "NATURAL", + "REJECTLOG", + "CONSTRAINT", + "CREDENTIALS", + "MOSTLY8", + "PERCENT", + "REFERENCES", + "TO", + "USING", + "BLANKSASNULL", + "INTO", + "LANGUAGE", + "ANY", + "READRATIO", + "BACKUP", + "GZIP", + "UNION", + "CURRENT_USER", + "BETWEEN", + "BOTH", + "BZIP2", + "JOIN", + "TOP", + "TABLE", + "USER", + "COLLATE", + "NOT", + "ONLY", + "NEW", + "ORDER", + "SIMILAR", + "COLUMN", + "CROSS", + "IGNORE", + "WHEN", + "GLOBALDICT64K", + "GRANT", + "RIGHT", + "ON", + "SYSTEM", + "ELSE", + "ISNULL", + "MINUS", + "ILIKE", + "MOSTLY13", + "RESTORE", + "DO", + "FOR", + "FREEZE", + "INTERSECT", + "IS", + "UNIQUE", + "END", + "LEADING", + "OFF", + "RECOVER", + "AUTHORIZATION", + "BYTEDICT", + "CURRENT_TIME", + "ARRAY", + "DELTA", + "IN", + "PARALLEL", + "RAW", + "SELECT", + "TDES", + "CAST", + "EXPLICIT", + "OR", + "NULL", + "OVERLAPS", + "CHECK", + "EMPTYASNULL", + "GLOBALDICT256", + "INNER", + "RESORT", + "CASE", + "ENCODE", + "FALSE", + "SESSION_USER", + "TEXT32K", + "ALLOWOVERWRITE", + "AZ64", + "PERMISSIONS", + "LEFT", + "DESC", + "HAVING", + "INITIALLY", + "AES256", + "ENCRYPT ", + "LUN", + "SYSDATE", + "FROM", + "GROUP", + "PRIMARY", + "DEFLATE", + "FOREIGN", + "LOCALTIMESTAMP", + "NOTNULL", + "OUTER", + "AS", + "BINARY", + "CURRENT_DATE", + "TRUNCATECOLUMNS", + "VERBOSE", + "NULLS", + "SOME", + "ANALYSE", + "ANALYZE", + "FULL", + "OFFLINE", + "SNAPSHOT ", + "CURRENT_USER_ID", + "DISABLE", + "LIKE", + "LZOP", + "TRAILING", + "WITHOUT", + "ENCRYPTION", + "IDENTITY", + "LIMIT", + "PARTITION", + "TIMESTAMP", + "DISTINCT", + "MOSTLY32", + "OID" + ], + "S3_DATALAKE": [ + "INTERSECT", + "LOCAL", + "SET", + "TIMESTAMP", + "struct{}{}", + "CONSTRAINT", + "CREATE", + "NUMERIC", + "EXCHANGE", + "LEFT", + "COMMIT", + "CURSOR", + "DESCRIBE", + "LATERAL", + "RANGE", + "ORDER", + "PERCENT", + "BOTH", + "EXTRACT", + "FETCH", + "FOLLOWING", + "GROUP", + "NONE", + "TRIGGER", + "TRUNCATE", + "WHEN", + "BY", + "COLUMN", + "EXISTS", + "MACRO", + "THEN", + "CASE", + "CHAR", + "DECIMAL", + "PRIMARY", + "RLIKE", + "REFERENCES", + "RIGHT", + "TABLE", + "VIEWS", + "EXTENDED", + "OVER", + "TO", + "PARTITION", + "UNBOUNDED", + "USING", + "INT", + "NOT", + "SMALLINT", + "ROW", + "ROWS", + "CURRENT_TIMESTAMP", + "DOUBLE", + "FOR", + "IMPORT", + "LESS", + "OR", + "ARRAY", + "DISTINCT", + "ONLY", + "CONF", + "HAVING", + "JOIN", + "OF", + "OUT", + "SELECT", + "CAST", + "TABLESAMPLE", + "GROUPING", + "INNER", + "END", + "PRECEDING", + "OUTER", + "INTERVAL", + "REDUCE", + "ROLLBACK", + "UNION", + "WINDOW", + "FUNCTION", + "ON", + "START", + "BETWEEN", + "DATABASE", + "DAYOFWEEK", + "FOREIGN", + "GRANT", + "MORE", + "CURRENT_DATE", + "READS", + "USER", + "WHERE", + "ALL", + "FALSE", + "TRANSFORM", + "VARCHAR", + "WITH", + "ELSE", + "INSERT", + "INTEGER", + "INTO", + "IS", + "DATE", + "NULL", + "REGEXP", + "REVOKE", + "AUTHORIZATION", + "BIGINT", + "BOOLEAN", + "FLOOR", + "MAP", + "CUBE", + "FLOAT", + "FULL", + "ALTER", + "CURRENT", + "PARTIALSCAN", + "ROLLUP", + "UNIQUEJOIN", + "UTC_TIMESTAMP", + "CASHE", + "DELETE", + "EXTERNAL", + "LIKE", + "BINARY", + "CROSS", + "FROM", + "PRECISION", + "PRESERVE", + "TIME", + "UPDATE", + "AND", + "AS", + "DROP", + "IF", + "IN", + "PROCEDURE", + "VALUES" + ], + "SNOWFLAKE": [ + "CHECK", + "DATABASE", + "ORGANIZATION", + "SELECT", + "UPDATE", + "WHERE", + "AS", + "CONSTRAINT", + "CROSS", + "ELSE", + "IN", + "LOCALTIMESTAMP", + "OF", + "ON", + "ALTER", + "ROWS", + "WHEN", + "RIGHT", + "LEFT", + "NATURAL", + "GSCLUSTER", + "COLUMN", + "CURRENT", + "ROW", + "TRIGGER", + "TRY_CAST", + "WHENEVER", + "ACCOUNT", + "LOCALTIME", + "ORDER", + "GRANT", + "HAVING", + "INCREMENT", + "MINUS", + "OR", + "RLIKE", + "SAMPLE", + "VALUES", + "FULL", + "VIEW", + "DELETE", + "DROP", + "ISSUE", + "ANY", + "DISTINCT", + "EXISTS", + "INTO", + "NOT", + "NULL", + "TO", + "CURRENT_DATE", + "FOR", + "SCHEMA", + "TABLESAMPLE", + "struct{}{}", + "ALL", + "CURRENT_TIMESTAMP", + "FROM", + "LIKE", + "QUALIFY", + "CONNECTION", + "CASE", + "CURRENT_USER", + "INNER", + "LATERAL", + "THEN", + "UNION", + "AND", + "FALSE", + "USING", + "CREATE", + "BY", + "CONNECT", + "CURRENT_TIME", + "FOLLOWING", + "ILIKE", + "INSERT", + "JOIN", + "BETWEEN", + "START", + "TABLE", + "SOME", + "GROUP", + "INTERSECT", + "REGEXP", + "CAST", + "REVOKE", + "SET", + "UNIQUE", + "WITH", + "IS" + ] +} diff --git a/warehouse/transformer/internal/utils/reservedkeywords_test.go b/warehouse/transformer/internal/utils/reservedkeywords_test.go new file mode 100644 index 00000000000..7a0d535cd3e --- /dev/null +++ b/warehouse/transformer/internal/utils/reservedkeywords_test.go @@ -0,0 +1,15 @@ +package utils + +import ( + "testing" + + "github.com/stretchr/testify/require" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestReservedKeywordsMapping(t *testing.T) { + for _, destType := range whutils.WarehouseDestinations { + require.NotNilf(t, reservedKeywords[destType], "Reserved keywords not found for destination type %s", destType) + } +} diff --git a/warehouse/transformer/internal/utils/stringlikeobject.go b/warehouse/transformer/internal/utils/stringlikeobject.go new file mode 100644 index 00000000000..ac455ce4c8b --- /dev/null +++ b/warehouse/transformer/internal/utils/stringlikeobject.go @@ -0,0 +1,69 @@ +package utils + +import ( + "sort" + "strconv" + "unicode" +) + +func IsStringLikeObject(obj map[string]any) bool { + if len(obj) == 0 { + return false + } + + minKey, maxKey := int(^uint(0)>>1), -1 // Initialize minKey as max int, maxKey as -1 + + for key, value := range obj { + if !isNonNegativeInteger(key) { + return false + } + + strValue, ok := value.(string) + if !ok || len(strValue) != 1 { + return false + } + + numKey, err := strconv.Atoi(key) + if err != nil { + return false + } + + minKey = min(minKey, numKey) + maxKey = max(maxKey, numKey) + } + + for i := minKey; i <= maxKey; i++ { + if _, exists := obj[strconv.Itoa(i)]; !exists { + return false + } + } + return (minKey == 0 || minKey == 1) && maxKey-minKey+1 == len(obj) +} + +func isNonNegativeInteger(str string) bool { + if len(str) == 0 { + return false + } + for _, char := range str { + if !unicode.IsDigit(char) { + return false + } + } + return true +} + +func StringLikeObjectToString(obj map[string]any) any { + keys := make([]int, 0, len(obj)) + for key := range obj { + numKey, _ := strconv.Atoi(key) + keys = append(keys, numKey) + } + + sort.Ints(keys) + + result := "" + for _, key := range keys { + result += obj[strconv.Itoa(key)].(string) + } + return result +} diff --git a/warehouse/transformer/internal/utils/stringlikeobject_test.go b/warehouse/transformer/internal/utils/stringlikeobject_test.go new file mode 100644 index 00000000000..2989f87f99d --- /dev/null +++ b/warehouse/transformer/internal/utils/stringlikeobject_test.go @@ -0,0 +1,134 @@ +package utils + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsStringLikeObject(t *testing.T) { + testCases := []struct { + name string + input map[string]any + expected bool + }{ + { + name: "empty map", + input: map[string]any{}, + expected: false, + }, + { + name: "valid string-like object with 0 and 1", + input: map[string]any{ + "0": "a", + "1": "b", + }, + expected: true, + }, + { + name: "valid string-like object with 1 and 2", + input: map[string]any{ + "1": "x", + "2": "y", + }, + expected: true, + }, + { + name: "empty key", + input: map[string]any{ + "": "", + "1": "x", + "2": "y", + }, + expected: false, + }, + { + name: "invalid key type", + input: map[string]any{ + "0": "a", + "one": "b", + }, + expected: false, + }, + { + name: "value is not a string", + input: map[string]any{ + "0": 123, + }, + expected: false, + }, + { + name: "value string length not 1", + input: map[string]any{ + "0": "ab", + }, + expected: false, + }, + { + name: "missing key (1) in sequence", + input: map[string]any{ + "0": "a", + "2": "b", + }, + expected: false, + }, + { + name: "non-consecutive keys (1 ia missing)", + input: map[string]any{ + "0": "a", + "2": "b", + "3": "c", + }, + expected: false, + }, + { + name: "valid string-like object with non-negative integer keys", + input: map[string]any{ + "0": "a", + "1": "b", + "2": "c", + }, + expected: true, + }, + { + name: "valid string-like object with gaps (at 3)", + input: map[string]any{ + "1": "x", + "2": "y", + "4": "z", + }, + expected: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, IsStringLikeObject(tc.input)) + }) + } +} + +func TestStringLikeObjectToString(t *testing.T) { + testCases := []struct { + name string + input map[string]any + expected any + }{ + { + name: "valid string-like object with non-negative integer keys", + input: map[string]any{ + "0": "a", + "1": "b", + "2": "c", + }, + expected: "abc", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.True(t, IsStringLikeObject(tc.input)) + require.Equal(t, tc.expected, StringLikeObjectToString(tc.input)) + }) + } +} diff --git a/warehouse/transformer/internal/utils/utils.go b/warehouse/transformer/internal/utils/utils.go new file mode 100644 index 00000000000..b033a15017f --- /dev/null +++ b/warehouse/transformer/internal/utils/utils.go @@ -0,0 +1,136 @@ +package utils + +import ( + "fmt" + "regexp" + "strings" + "time" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/datatype" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +var ( + rudderCreatedTables = sliceToMap([]string{"tracks", "pages", "screens", "aliases", "groups", "accounts"}) + rudderIsolatedTables = sliceToMap([]string{"users", "identifies"}) + sourceCategoriesToUseRecordID = sliceToMap([]string{"cloud", "singer-protocol"}) + identityEnabledWarehouses = sliceToMap([]string{whutils.SNOWFLAKE, whutils.BQ}) + + supportedJSONPathPrefixes = []string{"track.", "identify.", "page.", "screen.", "alias.", "group.", "extract."} + fullEventColumnTypeByDestType = map[string]string{ + whutils.SNOWFLAKE: datatype.TypeJSON, + whutils.RS: datatype.TypeText, + whutils.BQ: datatype.TypeString, + whutils.POSTGRES: datatype.TypeJSON, + whutils.MSSQL: datatype.TypeJSON, + whutils.AzureSynapse: datatype.TypeJSON, + whutils.CLICKHOUSE: datatype.TypeString, + whutils.S3Datalake: datatype.TypeString, + whutils.DELTALAKE: datatype.TypeString, + whutils.GCSDatalake: datatype.TypeString, + whutils.AzureDatalake: datatype.TypeString, + } + + timestampRegex = regexp.MustCompile( + `^([+-]?\d{4})((-)((0[1-9]|1[0-2])(-([12]\d|0[1-9]|3[01])))([T\s]((([01]\d|2[0-3])((:)[0-5]\d))([:]\d+)?)?(:[0-5]\d([.]\d+)?)?([zZ]|([+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$`, + ) + timestampToParse = []string{time.RFC3339, time.RFC3339Nano, time.DateOnly, time.DateTime} + + minTimeInMs = time.Date(1, 1, 1, 0, 0, 0, 0, time.UTC) + maxTimeInMs = time.Date(9999, 12, 31, 23, 59, 59, 999000000, time.UTC) +) + +func sliceToMap(slice []string) map[string]struct{} { + return lo.SliceToMap(slice, func(item string) (string, struct{}) { + return item, struct{}{} + }) +} + +func IsDataLake(destType string) bool { + switch destType { + case whutils.S3Datalake, whutils.GCSDatalake, whutils.AzureDatalake: + return true + default: + return false + } +} + +func IsRudderSources(event map[string]any) bool { + return event["channel"] == "sources" || event["CHANNEL"] == "sources" +} + +func IsRudderCreatedTable(tableName string) bool { + _, ok := rudderCreatedTables[tableName] + return ok +} + +func IsRudderIsolatedTable(tableName string) bool { + _, ok := rudderIsolatedTables[tableName] + return ok +} + +func IsObject(val any) bool { + _, ok := val.(map[string]any) + return ok +} + +func IsIdentityEnabled(destType string) bool { + _, ok := identityEnabledWarehouses[destType] + return ok +} + +func CanUseRecordID(sourceCategory string) bool { + _, ok := sourceCategoriesToUseRecordID[sourceCategory] + return ok +} + +func HasJSONPathPrefix(jsonPath string) bool { + for _, prefix := range supportedJSONPathPrefixes { + if strings.HasPrefix(jsonPath, prefix) { + return true + } + } + return false +} + +func GetFullEventColumnTypeByDestType(destType string) string { + return fullEventColumnTypeByDestType[destType] +} + +func ValidTimestamp(input string) bool { + if !timestampRegex.MatchString(input) { + return false + } + + for _, format := range timestampToParse { + t, err := time.Parse(format, input) + if err != nil { + continue + } + return t.After(minTimeInMs) && t.Before(maxTimeInMs) + } + return false +} + +// ToString converts any value to a string representation. +// - If the value is nil, it returns an empty string. +// - If the value implements the fmt.Stringer interface, it returns the result of the String() method. +// - Otherwise, it returns a string representation using fmt.Sprintf. +func ToString(value interface{}) string { + if value == nil { + return "" + } + if str, ok := value.(fmt.Stringer); ok { + return str.String() + } + return fmt.Sprintf("%v", value) +} + +// IsBlank checks if the given value is considered "blank." +// - A value is considered blank if its string representation is an empty string. +// - The function first converts the value to its string representation using ToString and checks if its length is zero. +func IsBlank(value interface{}) bool { + return len(ToString(value)) == 0 +} diff --git a/warehouse/transformer/internal/utils/utils_test.go b/warehouse/transformer/internal/utils/utils_test.go new file mode 100644 index 00000000000..b4c247b1c8d --- /dev/null +++ b/warehouse/transformer/internal/utils/utils_test.go @@ -0,0 +1,149 @@ +package utils + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestIsRudderSources(t *testing.T) { + testCases := []struct { + name string + event map[string]any + want bool + }{ + {name: "channel is sources", event: map[string]any{"channel": "sources"}, want: true}, + {name: "CHANNEL is sources", event: map[string]any{"CHANNEL": "sources"}, want: true}, + {name: "channel is not sources", event: map[string]any{"channel": "not-sources"}, want: false}, + {name: "CHANNEL is not sources", event: map[string]any{"CHANNEL": "not-sources"}, want: false}, + {name: "empty event", event: map[string]any{}, want: false}, + {name: "nil event", event: nil, want: false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, IsRudderSources(tc.event)) + }) + } +} + +func TestIsObject(t *testing.T) { + testCases := []struct { + name string + val any + want bool + }{ + {name: "map", val: map[string]any{}, want: true}, + {name: "not map", val: "not map", want: false}, + {name: "nil", val: nil, want: false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, IsObject(tc.val)) + }) + } +} + +func TestFullEventColumnTypeByDestTypeMapping(t *testing.T) { + for _, destType := range whutils.WarehouseDestinations { + require.NotNilf(t, fullEventColumnTypeByDestType[destType], "Full event column type not found for destination type %s", destType) + } +} + +func TestValidTimestamp(t *testing.T) { + testCases := []struct { + name, timestamp string + expected bool + }{ + {name: "Timestamp without timezone", timestamp: "2021-06-01T00:00:00.000Z", expected: true}, + {name: "Timestamp with timezone", timestamp: "2021-06-01T00:00:00.000+00:00", expected: true}, + {name: "Invalid timestamp", timestamp: "invalid-timestamp", expected: false}, + {name: "Invalid RFC3339 timestamp (day-month-year)", timestamp: "23-05-2024T10:00:00Z", expected: false}, + {name: "Invalid RFC3339 timestamp (Invalid hour)", timestamp: "2024-05-23T25:00:00Z", expected: false}, + {name: "Empty timestamp", timestamp: "", expected: false}, + {name: "Timestamps out of bounds (Before minTimeInMs)", timestamp: "0001-01-01T00:00:00.000Z", expected: false}, + {name: "Timestamps out of bounds (After maxTimeInMs)", timestamp: "9999-12-31T23:59:59.999Z", expected: false}, + {name: "Time-only", timestamp: "05:23:59.244Z", expected: false}, + {name: "Date Time only", timestamp: "2021-06-01 00:00:00", expected: true}, + {name: "Date-only", timestamp: "2023-06-14", expected: true}, + {name: "Positive year and time input", timestamp: "+2023-06-14T05:23:59.244Z", expected: false}, + {name: "Negative year and time input", timestamp: "-2023-06-14T05:23:59.244Z", expected: false}, + {name: "Malicious string input should return false", timestamp: "%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216Windows%u2216win%u002ein", expected: false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, ValidTimestamp(tc.timestamp)) + }) + } +} + +type Person struct { + Name string + Age int +} + +func (p Person) String() string { + return fmt.Sprintf("Person(Name: %s, Age: %d)", p.Name, p.Age) +} + +func TestToString(t *testing.T) { + testCases := []struct { + input interface{} + expected string + }{ + {nil, ""}, // nil + {"", ""}, // empty string + {"Hello", "Hello"}, // non-empty string + {123, "123"}, // int + {123.45, "123.45"}, // float + {true, "true"}, // bool true + {false, "false"}, // bool false + {[]int{1, 2, 3}, "[1 2 3]"}, // slice + {map[string]int{"key": 1}, "map[key:1]"}, // map + {struct{}{}, "{}"}, // empty struct + {struct{ Field string }{"value"}, "{value}"}, // struct with field + {Person{Name: "Alice", Age: 30}, "Person(Name: Alice, Age: 30)"}, // struct with String method + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("ToString(%v)", tc.input), func(t *testing.T) { + require.Equal(t, tc.expected, ToString(tc.input)) + }) + } +} + +func TestIsBlank(t *testing.T) { + testCases := []struct { + name string + input interface{} + expected bool + }{ + {"NilValue", nil, true}, // nil + {"EmptyString", "", true}, // empty string + {"NonEmptyString", "Hello", false}, // non-empty string + {"IntZero", 0, false}, // integer zero + {"IntNonZero", 123, false}, // non-zero integer + {"FloatZero", 0.0, false}, // float zero + {"FloatNonZero", 123.45, false}, // non-zero float + {"BoolFalse", false, false}, // boolean false + {"BoolTrue", true, false}, // boolean true + {"EmptySlice", []int{}, false}, // empty slice + {"NonEmptySlice", []int{1, 2, 3}, false}, // non-empty slice + {"EmptyMap", map[string]int{}, false}, // empty map + {"NonEmptyMap", map[string]int{"key": 1}, false}, // non-empty map + {"EmptyStruct", struct{}{}, false}, // empty struct + {"StructWithField", struct{ Field string }{"value"}, false}, // non-empty struct + {"StructWithMethod", Person{Name: "Alice", Age: 30}, false}, // struct with String method + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, IsBlank(tc.input)) + }) + } +} diff --git a/warehouse/transformer/jsonpath.go b/warehouse/transformer/jsonpath.go new file mode 100644 index 00000000000..135f57876e1 --- /dev/null +++ b/warehouse/transformer/jsonpath.go @@ -0,0 +1,51 @@ +package transformer + +import ( + "strings" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +type jsonPathInfo struct { + keysMap map[string]int + legacyKeysMap map[string]int +} + +func extractJSONPathInfo(jsonPaths []string) jsonPathInfo { + jp := jsonPathInfo{ + keysMap: make(map[string]int), + legacyKeysMap: make(map[string]int), + } + for _, jsonPath := range jsonPaths { + if trimmedJSONPath := strings.TrimSpace(jsonPath); trimmedJSONPath != "" { + jp.processJSONPath(trimmedJSONPath) + } + } + return jp +} + +func (jp *jsonPathInfo) processJSONPath(jsonPath string) { + splitPaths := strings.Split(jsonPath, ".") + key := strings.Join(splitPaths, "_") + pos := len(splitPaths) - 1 + + if utils.HasJSONPathPrefix(jsonPath) { + jp.keysMap[key] = pos + return + } + jp.legacyKeysMap[key] = pos +} + +func isValidJSONPathKey(key string, level int, jsonKeys map[string]int) bool { + if val, exists := jsonKeys[key]; exists { + return val == level + } + return false +} + +func isValidLegacyJSONPathKey(eventType, key string, level int, jsonKeys map[string]int) bool { + if eventType == "track" { + return isValidJSONPathKey(key, level, jsonKeys) + } + return false +} diff --git a/warehouse/transformer/jsonpath_test.go b/warehouse/transformer/jsonpath_test.go new file mode 100644 index 00000000000..69b3e390530 --- /dev/null +++ b/warehouse/transformer/jsonpath_test.go @@ -0,0 +1,148 @@ +package transformer + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestExtractJSONPathInfo(t *testing.T) { + testCases := []struct { + name string + jsonPaths []string + expected jsonPathInfo + }{ + { + name: "Valid JSON paths with track prefix", + jsonPaths: []string{"track.properties.name", "track.properties.age", "properties.name", "properties.age"}, + expected: jsonPathInfo{ + keysMap: map[string]int{"track_properties_name": 2, "track_properties_age": 2}, + legacyKeysMap: map[string]int{"properties_name": 1, "properties_age": 1}, + }, + }, + { + name: "Valid JSON paths with identify prefix", + jsonPaths: []string{"identify.traits.address.city", "identify.traits.address.zip", "traits.address.city", "traits.address.zip"}, + expected: jsonPathInfo{ + keysMap: map[string]int{"identify_traits_address_city": 3, "identify_traits_address_zip": 3}, + legacyKeysMap: map[string]int{"traits_address_city": 2, "traits_address_zip": 2}, + }, + }, + { + name: "Whitespace and empty path", + jsonPaths: []string{" ", "track.properties.name", ""}, + expected: jsonPathInfo{ + keysMap: map[string]int{"track_properties_name": 2}, + legacyKeysMap: make(map[string]int), + }, + }, + { + name: "Unknown prefix JSON paths", + jsonPaths: []string{"unknown.prefix.eventType.name", "unknown.prefix.eventType.value"}, + expected: jsonPathInfo{ + keysMap: make(map[string]int), + legacyKeysMap: map[string]int{"unknown_prefix_eventType_name": 3, "unknown_prefix_eventType_value": 3}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, extractJSONPathInfo(tc.jsonPaths)) + }) + } +} + +func TestIsValidJSONPathKey(t *testing.T) { + testCases := []struct { + name, key string + level int + isValid bool + }{ + { + name: "Valid JSON path key with track prefix", + key: "track_properties_name", + level: 2, + isValid: true, + }, + { + name: "Valid JSON path key with identify prefix", + key: "identify_traits_address_city", + level: 3, + isValid: true, + }, + { + name: "Valid JSON path key with unknown prefix", + key: "unknown_prefix_eventType_name", + level: 3, + isValid: false, + }, + { + name: "Invalid JSON path key", + key: "invalid_key", + level: 0, + isValid: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pathsInfo := extractJSONPathInfo( + []string{ + "track.properties.name", "properties.name", + "identify.traits.address.city", "traits.address.city", + "unknown.prefix.eventType.name", + }, + ) + require.Equal(t, tc.isValid, isValidJSONPathKey(tc.key, tc.level, pathsInfo.keysMap)) + }) + } +} + +func TestIsValidLegacyJSONPathKey(t *testing.T) { + testCases := []struct { + name, key, eventType string + level int + isValid bool + }{ + { + name: "Valid JSON path key with track prefix", + key: "properties_name", + eventType: "track", + level: 1, + isValid: true, + }, + { + name: "Valid JSON path key with identify prefix", + key: "traits_address_city", + eventType: "identify", + level: 2, + isValid: false, + }, + { + name: "Valid JSON path key with unknown prefix", + key: "unknown_prefix_eventType_name", + eventType: "track", + level: 3, + isValid: true, + }, + { + name: "Invalid JSON path key", + key: "invalid_key", + eventType: "track", + level: 0, + isValid: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pathsInfo := extractJSONPathInfo( + []string{ + "track.properties.name", "properties.name", + "identify.traits.address.city", "traits.address.city", + "unknown.prefix.eventType.name", + }, + ) + require.Equal(t, tc.isValid, isValidLegacyJSONPathKey(tc.eventType, tc.key, tc.level, pathsInfo.legacyKeysMap)) + }) + } +} diff --git a/warehouse/transformer/merge.go b/warehouse/transformer/merge.go new file mode 100644 index 00000000000..601256f97f2 --- /dev/null +++ b/warehouse/transformer/merge.go @@ -0,0 +1,172 @@ +package transformer + +import ( + "fmt" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/types" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/datatype" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +type mergeRule struct { + Type, Value any +} + +type mergeRulesColumns struct { + Prop1Type, Prop1Value, Prop2Type, Prop2Value string +} + +func (t *transformer) handleMergeEvent(pi *processingInfo) ([]map[string]any, error) { + if !t.config.enableIDResolution.Load() { + return nil, nil + } + if !utils.IsIdentityEnabled(pi.event.Metadata.DestinationType) { + return nil, nil + } + + mergeProp1, mergeProp2, err := mergeProps(pi.event.Message, pi.event.Metadata) + if err != nil { + return nil, fmt.Errorf("merge properties: %w", err) + } + if mergeProp1.Type == nil || mergeProp1.Value == nil || utils.IsBlank(mergeProp1.Type) || utils.IsBlank(mergeProp1.Value) { + return nil, nil + } + + mergeTableName, err := mergeRuleTable(pi.event.Metadata.DestinationType, pi.itrOpts) + if err != nil { + return nil, fmt.Errorf("merge rules table: %w", err) + } + mergeColumns, err := mergeRuleColumns(pi.event.Metadata.DestinationType, pi.itrOpts) + if err != nil { + return nil, fmt.Errorf("merge columns: %w", err) + } + + data := map[string]any{ + mergeColumns.Prop1Type: fmt.Sprint(mergeProp1.Type), + mergeColumns.Prop1Value: fmt.Sprint(mergeProp1.Value), + } + mergeColumnTypes := map[string]any{ + mergeColumns.Prop1Type: datatype.TypeString, + mergeColumns.Prop1Value: datatype.TypeString, + } + metadata := map[string]any{ + "table": mergeTableName, + "columns": mergeColumnTypes, + "isMergeRule": true, + "receivedAt": pi.event.Metadata.ReceivedAt, + "mergePropOne": data[mergeColumns.Prop1Value], + } + + if mergeProp2 != nil && mergeProp2.Type != nil && mergeProp2.Value != nil && !utils.IsBlank(mergeProp2.Type) && !utils.IsBlank(mergeProp2.Value) { + data[mergeColumns.Prop2Type] = fmt.Sprint(mergeProp2.Type) + data[mergeColumns.Prop2Value] = fmt.Sprint(mergeProp2.Value) + mergeColumnTypes[mergeColumns.Prop2Type] = datatype.TypeString + mergeColumnTypes[mergeColumns.Prop2Value] = datatype.TypeString + + metadata["mergePropTwo"] = data[mergeColumns.Prop2Value] + } + + output := map[string]any{ + "data": data, + "metadata": metadata, + "userId": "", + } + return []map[string]any{output}, nil +} + +func mergeProps(message types.SingularEventT, metadata ptrans.Metadata) (*mergeRule, *mergeRule, error) { + switch metadata.EventType { + case "merge": + return mergePropsForMergeEventType(message) + case "alias": + return mergePropsForAliasEventType(message) + default: + return mergePropsForDefaultEventType(message) + } +} + +func mergePropsForMergeEventType(message types.SingularEventT) (*mergeRule, *mergeRule, error) { + mergeProperties := misc.MapLookup(message, "mergeProperties") + if mergeProperties == nil { + return nil, nil, errMergePropertiesMissing + } + mergePropertiesArr, ok := mergeProperties.([]any) + if !ok { + return nil, nil, errMergePropertiesNotArray + } + if len(mergePropertiesArr) != 2 { + return nil, nil, errMergePropertiesNotSufficient + } + + mergePropertiesMap0, ok := mergePropertiesArr[0].(map[string]any) + if !ok { + return nil, nil, errMergePropertyOneInvalid + } + mergePropertiesMap1, ok := mergePropertiesArr[1].(map[string]any) + if !ok { + return nil, nil, errMergePropertyTwoInvalid + } + + mergeProperties0Type := misc.MapLookup(mergePropertiesMap0, "type") + mergeProperties0Value := misc.MapLookup(mergePropertiesMap0, "value") + mergeProperties1Type := misc.MapLookup(mergePropertiesMap1, "type") + mergeProperties1Value := misc.MapLookup(mergePropertiesMap1, "value") + + if mergeProperties0Type == nil || mergeProperties0Value == nil || mergeProperties1Type == nil || mergeProperties1Value == nil { + return nil, nil, errMergePropertyNull + } + + mergeProp1 := &mergeRule{Type: mergeProperties0Type, Value: mergeProperties0Value} + mergeProp2 := &mergeRule{Type: mergeProperties1Type, Value: mergeProperties1Value} + return mergeProp1, mergeProp2, nil +} + +func mergePropsForAliasEventType(message types.SingularEventT) (*mergeRule, *mergeRule, error) { + userID := misc.MapLookup(message, "userId") + previousID := misc.MapLookup(message, "previousId") + + mergeProp1 := &mergeRule{Type: "user_id", Value: userID} + mergeProp2 := &mergeRule{Type: "user_id", Value: previousID} + return mergeProp1, mergeProp2, nil +} + +func mergePropsForDefaultEventType(message types.SingularEventT) (*mergeRule, *mergeRule, error) { + anonymousID := misc.MapLookup(message, "anonymousId") + userID := misc.MapLookup(message, "userId") + + var mergeProp1, mergeProp2 *mergeRule + if anonymousID == nil { + mergeProp1 = &mergeRule{Type: "user_id", Value: userID} + } else { + mergeProp1 = &mergeRule{Type: "anonymous_id", Value: anonymousID} + mergeProp2 = &mergeRule{Type: "user_id", Value: userID} + } + return mergeProp1, mergeProp2, nil +} + +func mergeRuleTable(destType string, options integrationsOptions) (string, error) { + return SafeTableName(destType, options, "rudder_identity_merge_rules") +} + +func mergeRuleColumns(destType string, options integrationsOptions) (*mergeRulesColumns, error) { + columnNames := []string{ + "merge_property_1_type", "merge_property_1_value", "merge_property_2_type", "merge_property_2_value", + } + var ( + columns [4]string + err error + ) + + for i, col := range columnNames { + if columns[i], err = SafeColumnName(destType, options, col); err != nil { + return nil, fmt.Errorf("safe column name for %s: %w", col, err) + } + } + + rulesColumns := &mergeRulesColumns{ + Prop1Type: columns[0], Prop1Value: columns[1], Prop2Type: columns[2], Prop2Value: columns[3], + } + return rulesColumns, nil +} diff --git a/warehouse/transformer/merge_test.go b/warehouse/transformer/merge_test.go new file mode 100644 index 00000000000..847c4db59bf --- /dev/null +++ b/warehouse/transformer/merge_test.go @@ -0,0 +1,1004 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestMerge(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "merge (Postgres)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge"}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{}, + }, + { + name: "merge (BQ)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},{"type":"mobile","value":"+1-202-555-0146"}]}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "email", + "merge_property_1_value": "alex@example.com", + "merge_property_2_type": "mobile", + "merge_property_2_value": "+1-202-555-0146", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "alex@example.com", + "mergePropTwo": "+1-202-555-0146", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "merge (BQ) not enableIDResolution", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": false, + }, + eventPayload: `{"type":"merge"}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{}, + }, + { + name: "merge (BQ) missing mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge"}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: errMergePropertiesMissing.Error(), + StatusCode: errMergePropertiesMissing.StatusCode(), + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + }, + }, + }, + }, + { + name: "merge (BQ) invalid mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge", "mergeProperties": "invalid"}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: errMergePropertiesNotArray.Error(), + StatusCode: errMergePropertiesNotArray.StatusCode(), + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + }, + }, + }, + }, + { + name: "merge (BQ) empty mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge", "mergeProperties": []}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: errMergePropertiesNotSufficient.Error(), + StatusCode: errMergePropertiesNotSufficient.StatusCode(), + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + }, + }, + }, + }, + { + name: "merge (BQ) single mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"}]}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: errMergePropertiesNotSufficient.Error(), + StatusCode: errMergePropertiesNotSufficient.StatusCode(), + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + }, + }, + }, + }, + { + name: "merge (BQ) invalid merge property one", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":["invalid",{"type":"email","value":"alex@example.com"}]}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: errMergePropertyOneInvalid.Error(), + StatusCode: errMergePropertyOneInvalid.StatusCode(), + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + }, + }, + }, + }, + { + name: "merge (BQ) invalid merge property two", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},"invalid"]}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: errMergePropertyTwoInvalid.Error(), + StatusCode: errMergePropertyTwoInvalid.StatusCode(), + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + }, + }, + }, + }, + { + name: "merge (BQ) missing mergeProperty", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type1":"email","value1":"alex@example.com"},{"type1":"mobile","value1":"+1-202-555-0146"}]}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: errMergePropertyNull.Error(), + StatusCode: errMergePropertyNull.StatusCode(), + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + }, + }, + }, + }, + { + name: "merge (SNOWFLAKE)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},{"type":"mobile","value":"+1-202-555-0146"}]}`, + metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "SNOWFLAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "SNOWFLAKE", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "SNOWFLAKE", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "MERGE_PROPERTY_1_TYPE": "email", + "MERGE_PROPERTY_1_VALUE": "alex@example.com", + "MERGE_PROPERTY_2_TYPE": "mobile", + "MERGE_PROPERTY_2_VALUE": "+1-202-555-0146", + }, + "metadata": map[string]any{ + "table": "RUDDER_IDENTITY_MERGE_RULES", + "columns": map[string]any{"MERGE_PROPERTY_1_TYPE": "string", "MERGE_PROPERTY_1_VALUE": "string", "MERGE_PROPERTY_2_TYPE": "string", "MERGE_PROPERTY_2_VALUE": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "alex@example.com", + "mergePropTwo": "+1-202-555-0146", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "merge", + DestinationType: "SNOWFLAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "user_id", + "merge_property_1_value": "userId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "previousId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "userId", + "mergePropTwo": "previousId", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ) no userId and previousId", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ) empty userId and previousId", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"","previousId":"","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "alias", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (BQ)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (BQ) no anonymousID", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "user_id", + "merge_property_1_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "userId", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/options.go b/warehouse/transformer/options.go new file mode 100644 index 00000000000..30194d7a73d --- /dev/null +++ b/warehouse/transformer/options.go @@ -0,0 +1,82 @@ +package transformer + +import ( + "strings" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +type ( + integrationsOptions struct { + // skipReservedKeywordsEscaping when set to true, will skip the escaping of reserved keywords + skipReservedKeywordsEscaping bool + // useBlendoCasing when set to true, will use the casing as per Blendo's requirement + useBlendoCasing bool + // jsonPaths is a list of json paths that should be extracted from the event and stored as raw instead of normalizing them + jsonPaths []string + // skipTracksTable when set to true, will skip the tracks event + skipTracksTable bool + // skipUsersTable when set to true, will skip the users event + skipUsersTable bool + } + + destConfigOptions struct { + // skipTracksTable when set to true, will skip the tracks event + skipTracksTable bool + // skipUsersTable when set to true, will skip the users event + skipUsersTable bool + // storeFullEvent when set to true, will store the full event as rudder_event + storeFullEvent bool + // jsonPaths is a list of json paths that should be extracted from the event and stored as raw instead of normalizing them + jsonPaths []string + // underscoreDivideNumbers when set to false, if a column has a format like "_v_3_", it will be formatted to "_v3_" + // underscoreDivideNumbers when set to true, if a column has a format like "_v_3_", we keep it like that + // For older destinations, it will come as true and for new destinations this config will not be present which means we will treat it as false. + underscoreDivideNumbers bool + // allowUsersContextTraits when set to true, if context.traits.* is present, it will be added as context_traits_* and *, + // e.g., for context.traits.name, context_traits_name and name will be added to the user's table. + // allowUsersContextTraits when set to false, if context.traits.* is present, it will be added only as context_traits_* + // e.g., for context.traits.name, only context_traits_name will be added to the user's table. + // For older destinations, it will come as true, and for new destinations this config will not be present, which means we will treat it as false. + allowUsersContextTraits bool + } +) + +func prepareIntegrationOptions(event ptrans.TransformerEvent) (opts integrationsOptions) { + src := misc.MapLookup(event.Message, "integrations", event.Metadata.DestinationType, "options") + if src == nil || !utils.IsObject(src) { + return + } + + srcMap := src.(map[string]any) + + setOption(srcMap, "skipReservedKeywordsEscaping", &opts.skipReservedKeywordsEscaping) + setOption(srcMap, "useBlendoCasing", &opts.useBlendoCasing) + setOption(srcMap, "skipTracksTable", &opts.skipTracksTable) + setOption(srcMap, "skipUsersTable", &opts.skipUsersTable) + setOption(srcMap, "jsonPaths", &opts.jsonPaths) + return +} + +func prepareDestinationOptions(destConfig map[string]any) (opts destConfigOptions) { + setOption(destConfig, "skipTracksTable", &opts.skipTracksTable) + setOption(destConfig, "skipUsersTable", &opts.skipUsersTable) + setOption(destConfig, "underscoreDivideNumbers", &opts.underscoreDivideNumbers) + setOption(destConfig, "allowUsersContextTraits", &opts.allowUsersContextTraits) + setOption(destConfig, "storeFullEvent", &opts.storeFullEvent) + + var jsonPaths string + setOption(destConfig, "jsonPaths", &jsonPaths) + if len(jsonPaths) > 0 { + opts.jsonPaths = strings.Split(jsonPaths, ",") + } + return +} + +func setOption[T any](src map[string]any, key string, dest *T) { + if val, ok := src[key].(T); ok { + *dest = val + } +} diff --git a/warehouse/transformer/options_test.go b/warehouse/transformer/options_test.go new file mode 100644 index 00000000000..fd00e004d1e --- /dev/null +++ b/warehouse/transformer/options_test.go @@ -0,0 +1,167 @@ +package transformer + +import ( + "testing" + + "github.com/stretchr/testify/require" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestIntegrationOptions(t *testing.T) { + t.Run("AllOptionsSet", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": map[string]any{ + "skipReservedKeywordsEscaping": true, + "useBlendoCasing": false, + "skipTracksTable": true, + "skipUsersTable": false, + "jsonPaths": []string{"path1", "path2", "path3"}, + }, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + + opts := prepareIntegrationOptions(event) + + require.True(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.True(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.Equal(t, []string{"path1", "path2", "path3"}, opts.jsonPaths) + }) + t.Run("MissingOptions", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": map[string]any{}, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + opts := prepareIntegrationOptions(event) + + require.False(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.Empty(t, opts.jsonPaths) + }) + t.Run("NilIntegrationOptions", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": nil, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + opts := prepareIntegrationOptions(event) + + require.False(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.Empty(t, opts.jsonPaths) + }) + t.Run("PartialOptionsSet", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": map[string]any{ + "skipUsersTable": true, + "jsonPaths": []string{"path1"}, + }, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + + opts := prepareIntegrationOptions(event) + + require.True(t, opts.skipUsersTable) + require.False(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.False(t, opts.skipTracksTable) + require.Equal(t, []string{"path1"}, opts.jsonPaths) + }) +} + +func TestDestinationOptions(t *testing.T) { + t.Run("AllOptionsSet", func(t *testing.T) { + destConfig := map[string]any{ + "skipTracksTable": true, + "skipUsersTable": false, + "underscoreDivideNumbers": true, + "allowUsersContextTraits": false, + "storeFullEvent": true, + "jsonPaths": "path1,path2", + } + + opts := prepareDestinationOptions(destConfig) + + require.True(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.True(t, opts.underscoreDivideNumbers) + require.False(t, opts.allowUsersContextTraits) + require.True(t, opts.storeFullEvent) + require.Equal(t, []string{"path1", "path2"}, opts.jsonPaths) + }) + t.Run("MissingOptions", func(t *testing.T) { + destConfig := map[string]any{} + + opts := prepareDestinationOptions(destConfig) + + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.False(t, opts.underscoreDivideNumbers) + require.False(t, opts.allowUsersContextTraits) + require.False(t, opts.storeFullEvent) + require.Empty(t, opts.jsonPaths) + }) + t.Run("NilDestinationConfig", func(t *testing.T) { + opts := prepareDestinationOptions(nil) + + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.False(t, opts.underscoreDivideNumbers) + require.False(t, opts.allowUsersContextTraits) + require.False(t, opts.storeFullEvent) + require.Empty(t, opts.jsonPaths) + }) + t.Run("PartialOptionsSet", func(t *testing.T) { + destConfig := map[string]any{ + "skipTracksTable": true, + "jsonPaths": "path1,path2", + "allowUsersContextTraits": true, + } + + opts := prepareDestinationOptions(destConfig) + + require.True(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.False(t, opts.underscoreDivideNumbers) + require.True(t, opts.allowUsersContextTraits) + require.False(t, opts.storeFullEvent) + require.Equal(t, []string{"path1", "path2"}, opts.jsonPaths) + }) +} diff --git a/warehouse/transformer/page.go b/warehouse/transformer/page.go new file mode 100644 index 00000000000..569c97eaadc --- /dev/null +++ b/warehouse/transformer/page.go @@ -0,0 +1,58 @@ +package transformer + +import ( + "fmt" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" +) + +func (t *transformer) handlePageEvent(pi *processingInfo) ([]map[string]any, error) { + pageEvent := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], pageEvent, columnTypes, + "pages_properties_", 2, "", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], pageEvent, columnTypes, + "pages_context_", 2, "context_", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, pageEvent, columnTypes, + rules.DefaultRules, lo.Assign(rules.DefaultFunctionalRules, rules.PageFunctionalRules), + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, pageEvent, columnTypes); err != nil { + return nil, fmt.Errorf("storing rudder event: %w", err) + } + + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "pages") + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, pageEvent, columnTypes) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("handling merge event: %w", err) + } + + pageOutput := map[string]any{ + "data": pageEvent, + "metadata": map[string]any{ + "table": table, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{pageOutput}, mergeEvents...), nil +} diff --git a/warehouse/transformer/page_test.go b/warehouse/transformer/page_test.go new file mode 100644 index 00000000000..07f4f9d3259 --- /dev/null +++ b/warehouse/transformer/page_test.go @@ -0,0 +1,605 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestPageEvents(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "page (Postgres)", + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) without properties", + eventPayload: `{"type":"page","name":"Home","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) without context", + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_request_ip": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) store rudder event", + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "storeFullEvent": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "rudder_event": "{\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"}},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"properties\":{\"name\":\"Home\",\"title\":\"Home | RudderStack\",\"url\":\"http://www.rudderstack.com\"},\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"type\":\"page\",\"userId\":\"userId\"}", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "rudder_event": "json", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) partial rules", + eventPayload: `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Home", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "page", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/safe.go b/warehouse/transformer/safe.go new file mode 100644 index 00000000000..9b9807fae85 --- /dev/null +++ b/warehouse/transformer/safe.go @@ -0,0 +1,193 @@ +package transformer + +import ( + "errors" + "fmt" + "regexp" + "strings" + "unicode" + + "github.com/iancoleman/strcase" + + "github.com/rudderlabs/rudder-go-kit/config" + + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/snakecase" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +var ( + reLeadingUnderscores = regexp.MustCompile(`^_*`) + reNonAlphanumericOrDollar = regexp.MustCompile(`[^a-zA-Z0-9\\$]`) + reStartsWithLetterOrUnderscore = regexp.MustCompile(`^[a-zA-Z_].*`) + + errEmptyColumnName = errors.New("column name cannot be empty") + errEmptyTableName = errors.New("table name cannot be empty") +) + +// SafeNamespace returns a safe namespace for the given destination type and input namespace. +// The namespace is transformed by removing special characters, converting to snake case, +// and ensuring its safe (not starting with a digit, not empty, and not a reserved keyword). +func SafeNamespace(conf *config.Config, destType, input string) string { + namespace := strings.Join(extractAlphanumericValues(input), "_") + + if !shouldSkipSnakeCasing(conf, destType) { + namespace = strcase.ToSnake(namespace) + } + if len(namespace) > 0 && unicode.IsDigit(rune(namespace[0])) { + namespace = "_" + namespace // Handle namespace starting with a digit + } + if namespace == "" { + namespace = "stringempty" // Default to "stringempty" if namespace is empty + } + if utils.IsReservedKeyword(destType, namespace) { + namespace = fmt.Sprintf("_%s", namespace) // Escape reserved keywords + } + return misc.TruncateStr(namespace, 127) // Truncate to 127 characters and return the final result +} + +// extractAlphanumericValues extracts sequences of alphanumeric characters from the input string. +func extractAlphanumericValues(input string) []string { + var ( + extractedValues []string + currentValue strings.Builder + ) + + for _, c := range input { + if isAlphaAlphanumeric(c) { + currentValue.WriteRune(c) + } else if currentValue.Len() > 0 { + extractedValues = append(extractedValues, currentValue.String()) + currentValue.Reset() + } + } + if currentValue.Len() > 0 { + extractedValues = append(extractedValues, currentValue.String()) + } + return extractedValues +} + +func isAlphaAlphanumeric(c int32) bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') +} + +func shouldSkipSnakeCasing(conf *config.Config, destType string) bool { + configKey := fmt.Sprintf("Warehouse.%s.skipNamespaceSnakeCasing", whutils.WHDestNameMap[destType]) + return conf.GetBool(configKey, false) +} + +// SafeTableName processes the input table name based on the destination type and integration options. +// It applies case conversion, truncation, reserved keyword escaping, and table name length restrictions. +// For data lake providers, it avoids trimming the table name. +func SafeTableName(destType string, options integrationsOptions, tableName string) (string, error) { + if len(tableName) == 0 { + return "", errEmptyTableName + } + + switch destType { + case whutils.SNOWFLAKE: + tableName = strings.ToUpper(tableName) + case whutils.POSTGRES: + tableName = misc.TruncateStr(tableName, 63) + tableName = strings.ToLower(tableName) + default: + tableName = strings.ToLower(tableName) + } + + if !options.skipReservedKeywordsEscaping && utils.IsReservedKeyword(destType, tableName) { + tableName = "_" + tableName // Escape reserved keywords unless skipping is enabled + } + if utils.IsDataLake(destType) { + return tableName, nil // Do not trim the table name for data lake providers + } + return misc.TruncateStr(tableName, 127), nil // Trim table name to a maximum of 127 characters +} + +// SafeColumnName processes the input column name based on the destination type and integration options. +// It applies case conversion, truncation, reserved keyword escaping, and column name length restrictions. +// For data lake providers, it avoids trimming the column name. +func SafeColumnName(destType string, options integrationsOptions, columnName string) (string, error) { + if len(columnName) == 0 { + return "", errEmptyColumnName + } + + switch destType { + case whutils.SNOWFLAKE: + columnName = strings.ToUpper(columnName) + case whutils.POSTGRES: + columnName = misc.TruncateStr(columnName, 63) + columnName = strings.ToLower(columnName) + default: + columnName = strings.ToLower(columnName) + } + + if !options.skipReservedKeywordsEscaping && utils.IsReservedKeyword(destType, columnName) { + columnName = "_" + columnName // Escape reserved keywords unless skipping is enabled + } + if utils.IsDataLake(destType) { + return columnName, nil // Do not trim the column name for data lake providers + } + return misc.TruncateStr(columnName, 127), nil // Trim column name to a maximum of 127 characters +} + +// TransformTableName applies transformation to the input table name based on the destination type and configuration options. +// If `useBlendoCasing` is enabled, it converts the table name to lowercase and trims spaces. +// Otherwise, it applies a more general transformation using the `transformName` function. +func TransformTableName(destType string, integrationsOptions integrationsOptions, destConfigOptions destConfigOptions, tableName string) string { + if integrationsOptions.useBlendoCasing { + return strings.TrimSpace(strings.ToLower(tableName)) + } + return transformName(destType, destConfigOptions, tableName) +} + +// TransformColumnName applies transformation to the input column name based on the destination type and configuration options. +// If `useBlendoCasing` is enabled, it transforms the column name into Blendo casing. +// Otherwise, it applies a more general transformation using the `transformName` function. +func TransformColumnName(destType string, integrationsOptions integrationsOptions, destConfigOptions destConfigOptions, columnName string) string { + if integrationsOptions.useBlendoCasing { + return transformNameToBlendoCase(destType, columnName) + } + return transformName(destType, destConfigOptions, columnName) +} + +// transformName normalizes the input string by extracting alphanumeric values and converting it into snake case based on the configuration options. +// It handles leading underscores, adds a leading underscore if the first character is a digit, and truncates the name if necessary (e.g., for Postgres). +func transformName(destType string, options destConfigOptions, input string) string { + name := strings.Join(extractAlphanumericValues(input), "_") + + var snakeCaseFn func(s string) string + if options.underscoreDivideNumbers { + snakeCaseFn = snakecase.ToSnakeCase + } else { + snakeCaseFn = snakecase.ToSnakeCaseWithNumbers + } + if strings.HasPrefix(input, "_") { + leadingUnderscores := reLeadingUnderscores.FindString(input) + name = leadingUnderscores + snakeCaseFn(reLeadingUnderscores.ReplaceAllString(name, "")) + } else { + name = snakeCaseFn(name) + } + if len(name) > 0 && (rune(name[0]) >= '0' && rune(name[0]) <= '9') { + name = "_" + name // Add leading underscore if the first character is a digit + } + if destType == whutils.POSTGRES { + name = misc.TruncateStr(name, 63) // Trim key to 63 characters if the provider is Postgres + } + return name +} + +// transformNameToBlendoCase converts the input string into Blendo case format by replacing non-alphanumeric characters with underscores. +// If the name does not start with a letter or underscore, it adds a leading underscore. +// The name is truncated to 63 characters for Postgres, and the result is converted to lowercase. +func transformNameToBlendoCase(destType, name string) string { + key := reNonAlphanumericOrDollar.ReplaceAllString(name, "_") + + if !reStartsWithLetterOrUnderscore.MatchString(key) { + key = "_" + key // Ensure the key starts with a letter or underscore + } + if destType == whutils.POSTGRES { + key = misc.TruncateStr(name, 63) // Trim key to 63 characters if the provider is Postgres + } + return strings.ToLower(key) +} diff --git a/warehouse/transformer/safe_test.go b/warehouse/transformer/safe_test.go new file mode 100644 index 00000000000..336912e7380 --- /dev/null +++ b/warehouse/transformer/safe_test.go @@ -0,0 +1,899 @@ +package transformer + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestSafeNamespace(t *testing.T) { + testCases := []struct { + destType, namespace, expected string + }{ + {destType: whutils.RS, namespace: "omega", expected: "omega"}, + {destType: whutils.RS, namespace: "omega v2 ", expected: "omega_v_2"}, + {destType: whutils.RS, namespace: "9mega", expected: "_9_mega"}, + {destType: whutils.RS, namespace: "mega&", expected: "mega"}, + {destType: whutils.RS, namespace: "ome$ga", expected: "ome_ga"}, + {destType: whutils.RS, namespace: "omega$", expected: "omega"}, + {destType: whutils.RS, namespace: "ome_ ga", expected: "ome_ga"}, + {destType: whutils.RS, namespace: "9mega________-________90", expected: "_9_mega_90"}, + {destType: whutils.RS, namespace: "Cízǔ", expected: "c_z"}, + {destType: whutils.RS, namespace: "Rudderstack", expected: "rudderstack"}, + {destType: whutils.RS, namespace: "___", expected: "stringempty"}, + {destType: whutils.RS, namespace: "group", expected: "_group"}, + {destType: whutils.RS, namespace: "k3_namespace", expected: "k_3_namespace"}, + {destType: whutils.BQ, namespace: "k3_namespace", expected: "k3_namespace"}, + } + for _, tc := range testCases { + c := config.New() + c.Set("Warehouse.bigquery.skipNamespaceSnakeCasing", true) + + require.Equal(t, tc.expected, SafeNamespace(c, tc.destType, tc.namespace)) + } +} + +func TestSafeTableName(t *testing.T) { + testCases := []struct { + name, destType, tableName, expected string + options integrationsOptions + expectError bool + }{ + { + name: "Empty table name", + destType: whutils.SNOWFLAKE, + tableName: "", + expected: "", + expectError: true, // Should return error + }, + { + name: "Snowflake uppercase conversion", + destType: whutils.SNOWFLAKE, + tableName: "myTable", + expected: "MYTABLE", + expectError: false, + }, + { + name: "Postgres truncation and lowercase", + destType: whutils.POSTGRES, + tableName: "ThisIsAReallyLongTableNameThatExceedsThe63CharacterLimitForPostgresTables", + expected: "thisisareallylongtablenamethatexceedsthe63characterlimitforpost", + expectError: false, + }, + { + name: "Lowercase conversion for other destTypes", + destType: whutils.BQ, + tableName: "MyTableName", + expected: "mytablename", + expectError: false, + }, + { + name: "Reserved keyword escaping", + destType: whutils.SNOWFLAKE, + tableName: "SELECT", + expected: "_SELECT", // Should escape reserved keyword + expectError: false, + }, + { + name: "No reserved keyword escaping with skip option", + destType: whutils.SNOWFLAKE, + tableName: "SELECT", + options: integrationsOptions{skipReservedKeywordsEscaping: true}, + expected: "SELECT", // Should not escape reserved keyword + expectError: false, + }, + { + name: "Data lake, no trimming", + destType: whutils.S3Datalake, + tableName: "ThisIsAReallyLongTableNameThatExceedsThe63CharacterLimitForDatalakeTables", + expected: "thisisareallylongtablenamethatexceedsthe63characterlimitfordatalaketables", + expectError: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := SafeTableName(tc.destType, tc.options, tc.tableName) + + if tc.expectError { + require.Error(t, err) + require.Empty(t, result) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, result) + } + }) + } +} + +func TestSafeColumnName(t *testing.T) { + testCases := []struct { + name, destType, columnName, expected string + options integrationsOptions + expectError bool + }{ + { + name: "Empty column name", + destType: whutils.SNOWFLAKE, + columnName: "", + expected: "", + expectError: true, // Should return error + }, + { + name: "Snowflake uppercase conversion", + destType: whutils.SNOWFLAKE, + columnName: "myColumn", + expected: "MYCOLUMN", + expectError: false, + }, + { + name: "Postgres truncation and lowercase", + destType: whutils.POSTGRES, + columnName: "ThisIsAReallyLongColumnNameThatExceedsThe63CharacterLimitForPostgresTables", + expected: "thisisareallylongcolumnnamethatexceedsthe63characterlimitforpos", + expectError: false, + }, + { + name: "Lowercase conversion for other destTypes", + destType: whutils.BQ, + columnName: "MyColumnName", + expected: "mycolumnname", + expectError: false, + }, + { + name: "Reserved keyword escaping", + destType: whutils.SNOWFLAKE, + columnName: "SELECT", + expected: "_SELECT", // Should escape reserved keyword + expectError: false, + }, + { + name: "No reserved keyword escaping with skip option", + destType: whutils.SNOWFLAKE, + columnName: "SELECT", + options: integrationsOptions{skipReservedKeywordsEscaping: true}, + expected: "SELECT", // Should not escape reserved keyword + expectError: false, + }, + { + name: "Data lake, no trimming", + destType: whutils.S3Datalake, + columnName: "ThisIsAReallyLongColumnNameThatExceedsThe63CharacterLimitForDatalakeColumns", + expected: "thisisareallylongcolumnnamethatexceedsthe63characterlimitfordatalakecolumns", + expectError: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := SafeColumnName(tc.destType, tc.options, tc.columnName) + + if tc.expectError { + require.Error(t, err) + require.Empty(t, result) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, result) + } + }) + } +} + +func TestTransformTableName(t *testing.T) { + testCases := []struct { + name string + destType string + integrationsOptions integrationsOptions + destConfigOptions destConfigOptions + tableName string + expected string + }{ + { + name: "Blendo casing - table name trimmed and lowercased", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: " TableName ", + expected: "tablename", + }, + { + name: "Blendo casing - mixedcased to lowercased", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "CaMeLcAsE", + expected: "camelcase", + }, + { + name: "Blendo casing - mixedcased to lowercased", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "Table@Name!", + expected: "table@name!", + }, + { + name: "Blendo casing - alphanumeric", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "TableName123", + expected: "tablename123", + }, + + { + name: "Standard casing - underscoreDivideNumbers(true) - remove symbols and join continuous letters and numbers with a single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4_yasdfa_84224_fs_9_3_q", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega to omega", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega v2 to omega_v_2", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega v2", + expected: "omega_v_2", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - prepend underscore if name starts with a number", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega", + expected: "_9_mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - replace special character in the middle with underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing $ character", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - spaces and special characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscores and hyphens by reducing to single underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega________-________90", + expected: "_9_mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - non-ASCII characters by converting them to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - CamelCase123Key to camel_case_123_key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "CamelCase123Key", + expected: "camel_case_123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - numbers and commas", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - no valid characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - underscores between letters and numbers", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "test123", + expected: "test_123", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc123def456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + + { + name: "Standard casing - underscoreDivideNumbers(false) - remove symbols and join continuous letters and numbers with a single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4yasdfa_84224_fs9_3q", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega to omega", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega v2 to omega_v_2", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega v2", + expected: "omega_v2", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - prepend underscore if name starts with a number", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega", + expected: "_9mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - replace special character in the middle with underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing $ character", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - spaces and special characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscores and hyphens by reducing to single underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega________-________90", + expected: "_9mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - non-ASCII characters by converting them to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - CamelCase123Key to camel_case_123_key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "CamelCase123Key", + expected: "camel_case123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - numbers and commas", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - no valid characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - underscores between letters and numbers", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "test123", + expected: "test123", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc123def456", + expected: "abc123_def456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tableName := TransformTableName(tc.destType, tc.integrationsOptions, tc.destConfigOptions, tc.tableName) + require.Equal(t, tc.expected, tableName) + }) + } +} + +func TestTransformColumnName(t *testing.T) { + testCases := []struct { + name string + destType string + integrationsOptions integrationsOptions + destConfigOptions destConfigOptions + tableName string + expected string + }{ + { + name: "Blendo casing - special characters other than \\ or $ to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "column@Name$1", + expected: "column_name$1", + }, + { + name: "Blendo casing - add underscore if name does not start with an alphabet or underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "1CComega", + expected: "_1ccomega", + }, + { + name: "Blendo casing - non-ASCII characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "Cízǔ", + expected: "c_z_", + }, + { + name: "Blendo casing - CamelCase123Key to camelcase123key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "CamelCase123Key", + expected: "camelcase123key", + }, + { + name: "Blendo casing - preserve \\ and $ characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "path to $1,00,000", + expected: "path_to_$1_00_000", + }, + { + name: "Blendo casing - mix of characters, numbers, and special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "CamelCase123Key_with$special\\chars", + expected: "camelcase123key_with$special\\chars", + }, + { + name: "Blendo casing - limit length to 63 characters for postgres provider", + destType: whutils.POSTGRES, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: strings.Repeat("a", 70), + expected: strings.Repeat("a", 63), + }, + + { + name: "Standard casing - underscoreDivideNumbers(true) - remove symbols and join continuous letters and numbers with a single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4_yasdfa_84224_fs_9_3_q", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega to omega", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega v2 to omega_v_2", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega v2", + expected: "omega_v_2", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - prepend underscore if name starts with a number", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega", + expected: "_9_mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - replace special character in the middle with underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing $ character", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - spaces and special characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscores and hyphens by reducing to single underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega________-________90", + expected: "_9_mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - non-ASCII characters by converting them to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - CamelCase123Key to camel_case_123_key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "CamelCase123Key", + expected: "camel_case_123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - numbers and commas", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - no valid characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - underscores between letters and numbers", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "test123", + expected: "test_123", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc123def456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + + { + name: "Standard casing - underscoreDivideNumbers(false) - remove symbols and join continuous letters and numbers with a single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4yasdfa_84224_fs9_3q", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega to omega", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega v2 to omega_v_2", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega v2", + expected: "omega_v2", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - prepend underscore if name starts with a number", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega", + expected: "_9mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - replace special character in the middle with underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing $ character", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - spaces and special characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscores and hyphens by reducing to single underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega________-________90", + expected: "_9mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - non-ASCII characters by converting them to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - CamelCase123Key to camel_case_123_key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "CamelCase123Key", + expected: "camel_case123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - numbers and commas", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - no valid characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - underscores between letters and numbers", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "test123", + expected: "test123", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc123def456", + expected: "abc123_def456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore", + destType: whutils.POSTGRES, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: strings.Repeat("a", 70), + expected: strings.Repeat("a", 63), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tableName := TransformColumnName(tc.destType, tc.integrationsOptions, tc.destConfigOptions, tc.tableName) + require.Equal(t, tc.expected, tableName) + }) + } +} diff --git a/warehouse/transformer/screen.go b/warehouse/transformer/screen.go new file mode 100644 index 00000000000..49120c2e191 --- /dev/null +++ b/warehouse/transformer/screen.go @@ -0,0 +1,58 @@ +package transformer + +import ( + "fmt" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" +) + +func (t *transformer) handleScreenEvent(pi *processingInfo) ([]map[string]any, error) { + screenEvent := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], screenEvent, columnTypes, + "screen_properties_", 2, "", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], screenEvent, columnTypes, + "screen_context_", 2, "context_", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, screenEvent, columnTypes, + rules.DefaultRules, lo.Assign(rules.DefaultFunctionalRules, rules.ScreenFunctionalRules), + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, screenEvent, columnTypes); err != nil { + return nil, fmt.Errorf("storing rudder event: %w", err) + } + + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "screens") + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, screenEvent, columnTypes) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("handling merge event: %w", err) + } + + screenOutput := map[string]any{ + "data": screenEvent, + "metadata": map[string]any{ + "table": table, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{screenOutput}, mergeEvents...), nil +} diff --git a/warehouse/transformer/screen_test.go b/warehouse/transformer/screen_test.go new file mode 100644 index 00000000000..71b5666597e --- /dev/null +++ b/warehouse/transformer/screen_test.go @@ -0,0 +1,605 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestScreen(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "screen (Postgres)", + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Main", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "screens", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) without properties", + eventPayload: `{"type":"screen","name":"Main","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Main", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "screens", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) without context", + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"http://www.rudderstack.com"}}`, + metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Main", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_request_ip": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "screens", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) store rudder event", + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "storeFullEvent": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Main", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "rudder_event": "{\"type\":\"screen\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"}},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"properties\":{\"name\":\"Main\",\"title\":\"Home | RudderStack\",\"url\":\"http://www.rudderstack.com\"},\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"userId\":\"userId\"}", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "rudder_event": "json", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "screens", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) partial rules", + eventPayload: `{"type":"screen","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Main","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Main", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "screens", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"http://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "name": "Main", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "http://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "loaded_at": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "screens", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "screen", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/setdata.go b/warehouse/transformer/setdata.go new file mode 100644 index 00000000000..354def04af3 --- /dev/null +++ b/warehouse/transformer/setdata.go @@ -0,0 +1,163 @@ +package transformer + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +func (t *transformer) setDataAndColumnTypeFromInput( + pi *processingInfo, input any, + data map[string]any, columnType map[string]string, + completePrefix string, completeLevel int, + prefix string, level int, +) error { + if !utils.IsObject(input) { + return nil + } + + inputMap := input.(map[string]any) + + if len(inputMap) == 0 { + return nil + } + if (strings.HasSuffix(completePrefix, "context_traits_") || completePrefix == "group_traits_") && utils.IsStringLikeObject(inputMap) { + if prefix == "context_traits_" { + err := t.addColumnTypeAndValue(pi, prefix, utils.StringLikeObjectToString(inputMap), false, data, columnType) + if err != nil { + return fmt.Errorf("adding column type and value: %w", err) + } + return nil + } + return nil + } + for key, val := range inputMap { + if val == nil || utils.IsBlank(val) { + continue + } + + validLegacyJSONPath := isValidLegacyJSONPathKey(pi.event.Metadata.EventType, prefix+key, level, pi.jsonPathsInfo.legacyKeysMap) + validJSONPath := isValidJSONPathKey(completePrefix+key, completeLevel, pi.jsonPathsInfo.keysMap) + if validLegacyJSONPath || validJSONPath { + valJSON, err := json.Marshal(val) + if err != nil { + return fmt.Errorf("marshalling value: %w", err) + } + + err = t.addColumnTypeAndValue(pi, prefix+key, valJSON, true, data, columnType) + if err != nil { + return fmt.Errorf("adding column type and value: %w", err) + } + } else if utils.IsObject(val) && (pi.event.Metadata.SourceCategory != "cloud" || level < 3) { + err := t.setDataAndColumnTypeFromInput(pi, val.(map[string]any), data, columnType, completePrefix+key+"_", completeLevel+1, prefix+key+"_", level+1) + if err != nil { + return fmt.Errorf("setting data and column types from message: %w", err) + } + } else { + tempData := val + if pi.event.Metadata.SourceCategory == "cloud" && level >= 3 && utils.IsObject(val) { + var err error + tempData, err = json.Marshal(val) + if err != nil { + return fmt.Errorf("marshalling value: %w", err) + } + } + err := t.addColumnTypeAndValue(pi, prefix+key, tempData, false, data, columnType) + if err != nil { + return fmt.Errorf("adding column type and value: %w", err) + } + } + } + return nil +} + +func (t *transformer) addColumnTypeAndValue(pi *processingInfo, key string, val any, jsonKey bool, data map[string]any, columnType map[string]string) error { + columnName := TransformColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, key) + if len(columnName) == 0 { + return nil + } + + safeName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, columnName) + if err != nil { + return fmt.Errorf("transforming column name: %w", err) + } + + if rules.IsRudderReservedColumn(pi.event.Metadata.EventType, strings.ToLower(safeName)) { + return nil + } + + data[safeName] = val + columnType[safeName] = t.getDataType(pi.event.Metadata.DestinationType, key, val, jsonKey) + return nil +} + +func (t *transformer) setDataAndColumnTypeFromRules( + pi *processingInfo, + data map[string]any, columnType map[string]string, + rules map[string]string, functionalRules map[string]rules.FunctionalRules, +) error { + if err := t.setFromRules(pi, data, columnType, rules); err != nil { + return fmt.Errorf("setting data and column type from rules: %w", err) + } + if err := t.setFromFunctionalRules(pi, data, columnType, functionalRules); err != nil { + return fmt.Errorf("setting data and column type from functional rules: %w", err) + } + return nil +} + +func (t *transformer) setFromRules( + pi *processingInfo, + data map[string]any, columnType map[string]string, + rules map[string]string, +) error { + for colKey, valKey := range rules { + columnName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, colKey) + if err != nil { + return fmt.Errorf("safe column name: %w", err) + } + + delete(data, columnName) + delete(columnType, columnName) + + val := misc.MapLookup(pi.event.Message, strings.Split(valKey, ".")...) + if val == nil || utils.IsBlank(val) { + continue + } + + data[columnName] = val + columnType[columnName] = t.getDataType(pi.event.Metadata.DestinationType, colKey, val, false) + } + return nil +} + +func (t *transformer) setFromFunctionalRules( + pi *processingInfo, + data map[string]any, columnType map[string]string, + functionalRules map[string]rules.FunctionalRules, +) error { + for colKey, functionalRule := range functionalRules { + val, err := functionalRule(pi.event) + if err != nil { + return fmt.Errorf("applying functional rule: %w", err) + } + if val == nil || utils.IsBlank(val) { + continue + } + + columnName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, colKey) + if err != nil { + return fmt.Errorf("safe column name: %w", err) + } + + delete(data, columnName) + delete(columnType, columnName) + + data[columnName] = val + columnType[columnName] = t.getDataType(pi.event.Metadata.DestinationType, colKey, val, false) + } + return nil +} diff --git a/warehouse/transformer/track.go b/warehouse/transformer/track.go new file mode 100644 index 00000000000..3507c343214 --- /dev/null +++ b/warehouse/transformer/track.go @@ -0,0 +1,160 @@ +package transformer + +import ( + "fmt" + "strings" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" +) + +func (t *transformer) handleTrackEvent(pi *processingInfo) ([]map[string]any, error) { + responses := make([]map[string]any, 0) + + commonProps, commonColumnTypes, transformerEventName, err := t.trackCommonProps(pi) + if err != nil { + return nil, fmt.Errorf("track common properties: %w", err) + } + + tracksResponse, err := t.tracksResponse(pi, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("tracks response: %w", err) + } + + trackEventsResponse, err := t.trackEventsResponse(pi, transformerEventName, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("track events response: %w", err) + } + return append(append(responses, tracksResponse...), trackEventsResponse...), nil +} + +func (t *transformer) trackCommonProps(pi *processingInfo) (map[string]any, map[string]string, string, error) { + commonProps := make(map[string]any) + commonColumnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], commonProps, commonColumnTypes, + "track_context_", 2, "context_", 0, + ); err != nil { + return nil, nil, "", fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, commonProps, commonColumnTypes, + lo.Assign(rules.TrackRules, rules.DefaultRules), rules.DefaultFunctionalRules, + ); err != nil { + return nil, nil, "", fmt.Errorf("setting data and column types from rules: %w", err) + } + + eventColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "event") + if err != nil { + return nil, nil, "", fmt.Errorf("safe column name: %w", err) + } + eventTextColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "event_text") + if err != nil { + return nil, nil, "", fmt.Errorf("safe column name: %w", err) + } + + var eventName, transformerEventName string + if d, dok := commonProps[eventTextColName]; dok { + eventName, _ = d.(string) + } + transformerEventName = TransformTableName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, eventName) + + commonProps[eventColName] = transformerEventName + commonColumnTypes[eventColName] = "string" + return commonProps, commonColumnTypes, transformerEventName, nil +} + +func (t *transformer) tracksResponse(pi *processingInfo, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + if pi.itrOpts.skipTracksTable || pi.dstOpts.skipTracksTable { + return nil, nil + } + + trackEvent := make(map[string]any) + trackColumnTypes := make(map[string]string) + + trackEvent = lo.Assign(trackEvent, commonProps) + + if err := t.setDataAndColumnTypeFromRules(pi, trackEvent, trackColumnTypes, + nil, rules.TrackTableFunctionalRules, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, trackEvent, trackColumnTypes); err != nil { + return nil, fmt.Errorf("storing rudder event: %w", err) + } + + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "tracks") + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, trackEvent, lo.Assign(trackColumnTypes, commonColumnTypes)) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + output := map[string]any{ + "data": trackEvent, + "metadata": map[string]any{ + "table": table, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{output}, nil +} + +func (t *transformer) trackEventsResponse(pi *processingInfo, transformerEventName string, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + if len(transformerEventName) == 0 || len(strings.TrimSpace(transformerEventName)) == 0 { + return nil, nil + } + + tracksEvent := make(map[string]any) + tracksColumnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], tracksEvent, tracksColumnTypes, + "track_properties_", 2, "", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["userProperties"], tracksEvent, tracksColumnTypes, + "track_userProperties_", 2, "", 0, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, commonProps, commonColumnTypes, + nil, rules.TrackEventTableFunctionalRules, + ); err != nil { + return nil, fmt.Errorf("setting data and column types from rules: %w", err) + } + + eventTableEvent := lo.Assign(tracksEvent, commonProps) + + columnName := TransformColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, transformerEventName) + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, columnName) + if err != nil { + return nil, fmt.Errorf("safe table name: %w", err) + } + excludeTable := excludeRudderCreatedTableNames(table, pi.itrOpts.skipReservedKeywordsEscaping) + + columns, err := t.getColumns(pi.event.Metadata.DestinationType, eventTableEvent, lo.Assign(tracksColumnTypes, commonColumnTypes)) + if err != nil { + return nil, fmt.Errorf("getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("handling merge event: %w", err) + } + + trackOutput := map[string]any{ + "data": eventTableEvent, + "metadata": map[string]any{ + "table": excludeTable, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{trackOutput}, mergeEvents...), nil +} diff --git a/warehouse/transformer/track_test.go b/warehouse/transformer/track_test.go new file mode 100644 index 00000000000..0871a0f3cdd --- /dev/null +++ b/warehouse/transformer/track_test.go @@ -0,0 +1,2032 @@ +package transformer + +import ( + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +func TestTrack(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testsCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "track (POSTGRES)", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) without properties", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) without userProperties", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "received_at": "2021-09-01T00:00:00.000Z", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "received_at": "datetime", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) without context", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "5.6.7.8", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) RudderCreatedTable", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"accounts","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "accounts", + "event_text": "accounts", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "accounts", + "event_text": "accounts", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "_accounts", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) RudderCreatedTable with skipReservedKeywordsEscaping", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"accounts","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipReservedKeywordsEscaping":true}}}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "accounts", + "event_text": "accounts", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "accounts", + "event_text": "accounts", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "accounts", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) RudderIsolatedTable", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"users","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "users", + "event_text": "users", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "users", + "event_text": "users", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "_users", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) empty event", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "event": "", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "event": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) no event", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "event": "", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "event": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) store rudder event", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "storeFullEvent": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + "rudder_event": "{\"type\":\"track\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"}},\"event\":\"event\",\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"properties\":{\"product_id\":\"9578257311\",\"review_id\":\"86ac1cd43\"},\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"userId\":\"userId\",\"userProperties\":{\"rating\":3,\"review_body\":\"OK for the price. It works but the material feels flimsy.\"}}", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "rudder_event": "json", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) partial rules", + eventPayload: `{"type":"track","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","event":"event","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) skipTracksTable (dstOpts)", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "skipTracksTable": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) skipTracksTable (itrOpts)", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "BQ", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "BQ", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "loaded_at": "datetime", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "BQ", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "loaded_at": "datetime", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "merge_property_1_type": "string", + "merge_property_1_value": "string", + "merge_property_2_type": "string", + "merge_property_2_value": "string", + }, + "isMergeRule": true, + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "rudder_identity_merge_rules", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "BQ", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} diff --git a/warehouse/transformer/transformer.go b/warehouse/transformer/transformer.go new file mode 100644 index 00000000000..1abf21bddda --- /dev/null +++ b/warehouse/transformer/transformer.go @@ -0,0 +1,205 @@ +package transformer + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "strings" + "time" + + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +type ( + transformer struct { + now func() time.Time + + conf *config.Config + logger logger.Logger + statsFactory stats.Stats + + config struct { + enableIDResolution config.ValueLoader[bool] + enableArraySupport config.ValueLoader[bool] + populateSrcDestInfoInContext config.ValueLoader[bool] + maxColumnsInEvent config.ValueLoader[int] + } + } + + processingInfo struct { + event ptrans.TransformerEvent + itrOpts integrationsOptions + dstOpts destConfigOptions + jsonPathsInfo jsonPathInfo + } +) + +func New(conf *config.Config, logger logger.Logger, statsFactory stats.Stats) ptrans.DestinationTransformer { + t := &transformer{ + conf: conf, + logger: logger, + statsFactory: statsFactory, + now: time.Now, + } + + t.config.enableIDResolution = conf.GetReloadableBoolVar(false, "Warehouse.enableIDResolution") + t.config.enableArraySupport = conf.GetReloadableBoolVar(false, "Warehouse.clickhouse.enableArraySupport") + t.config.populateSrcDestInfoInContext = conf.GetReloadableBoolVar(true, "Warehouse.populateSrcDestInfoInContext") + t.config.maxColumnsInEvent = conf.GetReloadableIntVar(200, 1, "Warehouse.maxColumnsInEvent") + return t +} + +func (t *transformer) Transform(_ context.Context, clientEvents []ptrans.TransformerEvent, _ int) (res ptrans.Response) { + if len(clientEvents) == 0 { + return + } + + startTime := t.now() + metadata := clientEvents[0].Metadata + + defer func() { + tags := stats.Tags{ + "workspaceId": metadata.WorkspaceID, + "sourceId": metadata.SourceID, + "sourceType": metadata.SourceType, + "destinationId": metadata.DestinationID, + "destinationType": metadata.DestinationType, + } + + t.statsFactory.NewTaggedStat("warehouse_dest_transform_request_latency", stats.TimerType, tags).Since(startTime) + t.statsFactory.NewTaggedStat("warehouse_dest_transform_requests", stats.CountType, tags).Increment() + t.statsFactory.NewTaggedStat("warehouse_dest_transform_input_events", stats.HistogramType, tags).Observe(float64(len(clientEvents))) + t.statsFactory.NewTaggedStat("warehouse_dest_transform_output_events", stats.HistogramType, tags).Observe(float64(len(res.Events))) + t.statsFactory.NewTaggedStat("warehouse_dest_transform_output_failed_events", stats.HistogramType, tags).Observe(float64(len(res.FailedEvents))) + }() + + for _, event := range clientEvents { + r, err := t.processWarehouseMessage(event) + if err != nil { + res.FailedEvents = append(res.FailedEvents, t.transformerResponseFromErr(event, err)) + continue + } + + res.Events = append(res.Events, lo.Map(r, func(item map[string]any, index int) ptrans.TransformerResponse { + return ptrans.TransformerResponse{ + Output: item, + Metadata: event.Metadata, + StatusCode: http.StatusOK, + } + })...) + } + return +} + +func (t *transformer) processWarehouseMessage(event ptrans.TransformerEvent) ([]map[string]any, error) { + t.enhanceContextWithSourceDestInfo(event) + return t.handleEvent(event) +} + +func (t *transformer) enhanceContextWithSourceDestInfo(event ptrans.TransformerEvent) { + if !t.config.populateSrcDestInfoInContext.Load() { + return + } + + messageContext, ok := event.Message["context"].(map[string]any) + if !ok || messageContext == nil { + messageContext = map[string]any{} + } + messageContext["sourceId"] = event.Metadata.SourceID + messageContext["sourceType"] = event.Metadata.SourceType + messageContext["destinationId"] = event.Metadata.DestinationID + messageContext["destinationType"] = event.Metadata.DestinationType + + event.Message["context"] = messageContext +} + +func (t *transformer) handleEvent(event ptrans.TransformerEvent) ([]map[string]any, error) { + itrOpts := prepareIntegrationOptions(event) + dstOpts := prepareDestinationOptions(event.Destination.Config) + jsonPathsInfo := extractJSONPathInfo(append(itrOpts.jsonPaths, dstOpts.jsonPaths...)) + eventType := strings.ToLower(event.Metadata.EventType) + + pi := &processingInfo{ + event: event, + itrOpts: itrOpts, + dstOpts: dstOpts, + jsonPathsInfo: jsonPathsInfo, + } + + switch eventType { + case "extract": + return t.handleExtractEvent(pi) + case "track": + return t.handleTrackEvent(pi) + case "identify": + return t.handleIdentifyEvent(pi) + case "page": + return t.handlePageEvent(pi) + case "screen": + return t.handleScreenEvent(pi) + case "group": + return t.handleGroupEvent(pi) + case "alias": + return t.handleAliasEvent(pi) + case "merge": + return t.handleMergeEvent(pi) + default: + return nil, newTransErr(fmt.Sprintf("Unknown event type: %q", eventType), http.StatusBadRequest) + } +} + +func (t *transformer) transformerResponseFromErr(event ptrans.TransformerEvent, err error) ptrans.TransformerResponse { + var te *transErr + if ok := errors.As(err, &te); ok { + return ptrans.TransformerResponse{ + Output: nil, + Metadata: event.Metadata, + Error: te.Error(), + StatusCode: te.StatusCode(), + } + } + + return ptrans.TransformerResponse{ + Output: nil, + Metadata: event.Metadata, + Error: errInternalServer.Error(), + StatusCode: errInternalServer.StatusCode(), + } +} + +func storeRudderEvent(pi *processingInfo, data map[string]any, columnType map[string]string) error { + if !pi.dstOpts.storeFullEvent { + return nil + } + + safeName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "rudder_event") + if err != nil { + return fmt.Errorf("transforming column name: %w", err) + } + + eventJSON, err := json.Marshal(pi.event.Message) + if err != nil { + return fmt.Errorf("marshalling event: %w", err) + } + + data[safeName] = string(eventJSON) + columnType[safeName] = utils.GetFullEventColumnTypeByDestType(pi.event.Metadata.DestinationType) + return nil +} + +func excludeRudderCreatedTableNames(name string, skipReservedKeywordsEscaping bool) string { + nameLower := strings.ToLower(name) + if utils.IsRudderIsolatedTable(nameLower) || (utils.IsRudderCreatedTable(nameLower) && !skipReservedKeywordsEscaping) { + return "_" + name + } + return name +} diff --git a/warehouse/transformer/transformer_test.go b/warehouse/transformer/transformer_test.go new file mode 100644 index 00000000000..cef2f491414 --- /dev/null +++ b/warehouse/transformer/transformer_test.go @@ -0,0 +1,753 @@ +package transformer + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "strconv" + "strings" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/samber/lo" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/types" +) + +type eventsInfo struct { + payload []byte + metadata ptrans.Metadata + destination backendconfig.DestinationT +} + +func testEvents(t *testing.T, infos []eventsInfo, pTransformer, dTransformer ptrans.DestinationTransformer, expectedResponse ptrans.Response) { + t.Helper() + + var events []ptrans.TransformerEvent + for _, info := range infos { + var singularEvent types.SingularEventT + err := json.Unmarshal(info.payload, &singularEvent) + require.NoError(t, err) + + events = append(events, ptrans.TransformerEvent{ + Message: singularEvent, + Metadata: info.metadata, + Destination: info.destination, + }) + } + + ctx := context.Background() + batchSize := 100 + + pResponse := pTransformer.Transform(ctx, events, batchSize) + wResponse := dTransformer.Transform(ctx, events, batchSize) + require.Equal(t, len(expectedResponse.Events), len(pResponse.Events)) + require.Equal(t, len(expectedResponse.Events), len(wResponse.Events)) + require.Equal(t, len(expectedResponse.FailedEvents), len(pResponse.FailedEvents)) + require.Equal(t, len(expectedResponse.FailedEvents), len(wResponse.FailedEvents)) + + for i := range pResponse.Events { + data := expectedResponse.Events[i].Output["data"] + if data != nil && data.(map[string]any)["rudder_event"] != nil { + require.JSONEq(t, expectedResponse.Events[i].Output["data"].(map[string]any)["rudder_event"].(string), pResponse.Events[i].Output["data"].(map[string]any)["rudder_event"].(string)) + require.JSONEq(t, expectedResponse.Events[i].Output["data"].(map[string]any)["rudder_event"].(string), wResponse.Events[i].Output["data"].(map[string]any)["rudder_event"].(string)) + require.JSONEq(t, wResponse.Events[i].Output["data"].(map[string]any)["rudder_event"].(string), pResponse.Events[i].Output["data"].(map[string]any)["rudder_event"].(string)) + + delete(pResponse.Events[i].Output["data"].(map[string]any), "rudder_event") + delete(wResponse.Events[i].Output["data"].(map[string]any), "rudder_event") + delete(expectedResponse.Events[i].Output["data"].(map[string]any), "rudder_event") + } + } + for i := range pResponse.Events { + require.EqualValues(t, expectedResponse.Events[i], pResponse.Events[i]) + require.EqualValues(t, expectedResponse.Events[i], wResponse.Events[i]) + require.EqualValues(t, wResponse.Events[i], pResponse.Events[i]) + } + for i := range pResponse.FailedEvents { + require.EqualValues(t, expectedResponse.FailedEvents[i], pResponse.FailedEvents[i]) + require.EqualValues(t, expectedResponse.FailedEvents[i], wResponse.FailedEvents[i]) + require.EqualValues(t, wResponse.FailedEvents[i], pResponse.FailedEvents[i]) + } +} + +func TestTransformer(t *testing.T) { + testsCases := []struct { + name string + configOverride map[string]any + envOverride []string + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "unknown event (POSTGRES)", + eventPayload: `{"type":"unknown"}`, + metadata: ptrans.Metadata{ + EventType: "unknown", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: "Unknown event type: \"unknown\"", + StatusCode: http.StatusBadRequest, + Metadata: ptrans.Metadata{ + EventType: "unknown", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + }, + }, + }, + }, + { + name: "track (POSTGRES) not populateSrcDestInfoInContext", + configOverride: map[string]any{ + "Warehouse.populateSrcDestInfoInContext": false, + }, + envOverride: []string{"WH_POPULATE_SRC_DEST_INFO_IN_CONTEXT=false"}, + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) too many columns", + eventPayload: fmt.Sprintf(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","context":{%s},"ip":"1.2.3.4"}`, strings.Join( + lo.RepeatBy(500, func(index int) string { + return fmt.Sprintf(`"column_%d": "value_%d"`, index, index) + }), ",", + )), + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: "postgres transformer: Too many columns outputted from the event", + StatusCode: http.StatusBadRequest, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + }, + }, + }, + }, + { + name: "track (GCS_DATALAKE) too many columns", + eventPayload: fmt.Sprintf(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, strings.Join( + lo.RepeatBy(500, func(index int) string { + return fmt.Sprintf(`"column_%d": "value_%d"`, index, index) + }), ",", + )), + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "GCS_DATALAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "GCS_DATALAKE", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "GCS_DATALAKE", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": lo.Assign( + map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "GCS_DATALAKE", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), fmt.Sprintf(`value_%s`, item) + }, + ), + ), + "metadata": map[string]any{ + "columns": lo.Assign( + map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), "string" + }, + ), + ), + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "GCS_DATALAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": lo.Assign( + map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "GCS_DATALAKE", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), fmt.Sprintf(`value_%s`, item) + }, + ), + ), + "metadata": map[string]any{ + "columns": lo.Assign( + map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), "string" + }, + ), + ), + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "GCS_DATALAKE", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) with sources channel too many columns", + eventPayload: fmt.Sprintf(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"sources","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, strings.Join( + lo.RepeatBy(500, func(index int) string { + return fmt.Sprintf(`"column_%d": "value_%d"`, index, index) + }), ",", + )), + metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{}, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": lo.Assign( + map[string]any{ + "anonymous_id": "anonymousId", + "channel": "sources", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), fmt.Sprintf(`value_%s`, item) + }, + ), + ), + "metadata": map[string]any{ + "columns": lo.Assign( + map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), "string" + }, + ), + ), + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": lo.Assign( + map[string]any{ + "anonymous_id": "anonymousId", + "channel": "sources", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), fmt.Sprintf(`value_%s`, item) + }, + ), + ), + "metadata": map[string]any{ + "columns": lo.Assign( + map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + lo.SliceToMap( + lo.RepeatBy(500, func(index int) string { + return strconv.Itoa(index) + }), func(item string) (string, any) { + return fmt.Sprintf(`context_column_%s`, item), "string" + }, + ), + ), + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + }, + Metadata: ptrans.Metadata{ + EventType: "track", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + }, + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + + for _, tc := range testsCases { + t.Run(tc.name, func(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + opts := lo.Map(tc.envOverride, func(item string, index int) transformertest.Option { + return transformertest.WithEnv(item) + }) + opts = append(opts, transformertest.WithRepository("rudderstack/develop-rudder-transformer")) + transformerResource, err := transformertest.Setup(pool, t, opts...) + require.NoError(t, err) + + c := config.New() + c.Set("DEST_TRANSFORM_URL", transformerResource.TransformerURL) + c.Set("USER_TRANSFORM_URL", transformerResource.TransformerURL) + + for k, v := range tc.configOverride { + c.Set(k, v) + } + + eventsInfos := []eventsInfo{ + { + payload: []byte(tc.eventPayload), + metadata: tc.metadata, + destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +}