-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #98 from snowplow/feature/web/bigquery/integration…
…_tests Feature/web/bigquery/integration tests
- Loading branch information
Showing
21 changed files
with
283 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#!/bin/bash | ||
|
||
# Expected input: | ||
# -b (binary) path to sql-runner binary | ||
# -d (database) target database for expectations | ||
# -a (auth) optional credentials for database target | ||
# -m (model) target model to run i.e. web or mobile | ||
|
||
while getopts 'b:d:a:m:' opt | ||
do | ||
case $opt in | ||
b) SQL_RUNNER_PATH=$OPTARG ;; | ||
d) DATABASE=$OPTARG ;; | ||
a) CREDENTIALS=$OPTARG ;; | ||
m) MODEL=$OPTARG ;; | ||
esac | ||
done | ||
|
||
repo_root_path=$( cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd -P ) | ||
script_path="${repo_root_path}/.scripts" | ||
config_dir="${repo_root_path}/$MODEL/v1/$DATABASE/sql-runner/configs" | ||
|
||
# Set credentials via env vars | ||
export BIGQUERY_CREDS=${BIGQUERY_CREDS:-$CREDENTIALS} | ||
export REDSHIFT_PASSWORD=${REDSHIFT_PASSWORD:-$CREDENTIALS} | ||
export SNOWFLAKE_PASSWORD=${SNOWFLAKE_PASSWORD:-$CREDENTIALS} | ||
|
||
echo "integration_check: Starting 5 runs" | ||
|
||
for i in {1..5}; do | ||
|
||
echo "integration_check: Starting run $i"; | ||
|
||
bash .scripts/run_config.sh -b sql-runner -c $config_dir/pre_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit; | ||
|
||
echo "integration_check: Checking actual vs. expected for the events_staged table"; | ||
|
||
bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c events_staged_integration_test_${i} || exit 1; | ||
|
||
bash .scripts/run_config.sh -b sql-runner -c $config_dir/post_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit; | ||
|
||
echo "integration_check: run $i done"; | ||
|
||
done || exit 1 | ||
|
||
echo "integration_check: Checking actual vs. expected for derived tables"; | ||
|
||
bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_integration_test_tables || exit 1; | ||
|
||
echo "integration_check: Checking standard tests against derived tables"; | ||
|
||
bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_tables || exit 1; | ||
|
||
echo "integration_check: Done" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
69 changes: 69 additions & 0 deletions
69
.test/great_expectations/expectations/web/v1/integration_tests.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
{ | ||
"data_asset_type": "Dataset", | ||
"expectation_suite_name": "integration_tests", | ||
"expectations": [ | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "long_session" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "null_page_view_id" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "null_domain_userid" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "null_domain_sessionid" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "dupe_event_id_same_collector_tstamp" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "dupe_event_id_diff_collector_tstamp" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "dupe_page_view_id_diff_derived_tstamp" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "late_arriving_dvc_created_sent" | ||
} | ||
}, | ||
{ | ||
"expectation_type": "expect_column_values_to_be_null", | ||
"kwargs": { | ||
"column": "clean_session" | ||
} | ||
} | ||
], | ||
"meta": { | ||
"versions": { | ||
"test_suite_version": "1.1.1", | ||
"bigquery_model_version": "1.0.3" | ||
}, | ||
"__comment__": "expect_column_values_to_be_null on column stray_page_ping has been removed as it is a known issue (https://github.com/snowplow/data-models/issues/92)", | ||
"great_expectations.__version__": "0.12.0" | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
13 changes: 13 additions & 0 deletions
13
...eat_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_1.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"validation_operator_name": "action_list_operator", | ||
"batches": [ | ||
{ | ||
"batch_kwargs": { | ||
"datasource": "bigquery", | ||
"query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_1 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check", | ||
"bigquery_temp_table": "ge_test_derived_events_staged_integration" | ||
}, | ||
"expectation_suite_names": ["web.v1.integration_tests"] | ||
} | ||
] | ||
} |
13 changes: 13 additions & 0 deletions
13
...eat_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_2.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"validation_operator_name": "action_list_operator", | ||
"batches": [ | ||
{ | ||
"batch_kwargs": { | ||
"datasource": "bigquery", | ||
"query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_2 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check", | ||
"bigquery_temp_table": "ge_test_derived_events_staged_integration" | ||
}, | ||
"expectation_suite_names": ["web.v1.integration_tests"] | ||
} | ||
] | ||
} |
13 changes: 13 additions & 0 deletions
13
...eat_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_3.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"validation_operator_name": "action_list_operator", | ||
"batches": [ | ||
{ | ||
"batch_kwargs": { | ||
"datasource": "bigquery", | ||
"query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_3 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check", | ||
"bigquery_temp_table": "ge_test_derived_events_staged_integration" | ||
}, | ||
"expectation_suite_names": ["web.v1.integration_tests"] | ||
} | ||
] | ||
} |
13 changes: 13 additions & 0 deletions
13
...eat_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_4.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"validation_operator_name": "action_list_operator", | ||
"batches": [ | ||
{ | ||
"batch_kwargs": { | ||
"datasource": "bigquery", | ||
"query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_4 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check", | ||
"bigquery_temp_table": "ge_test_derived_events_staged_integration" | ||
}, | ||
"expectation_suite_names": ["web.v1.integration_tests"] | ||
} | ||
] | ||
} |
13 changes: 13 additions & 0 deletions
13
...eat_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_5.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"validation_operator_name": "action_list_operator", | ||
"batches": [ | ||
{ | ||
"batch_kwargs": { | ||
"datasource": "bigquery", | ||
"query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_5 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check", | ||
"bigquery_temp_table": "ge_test_derived_events_staged_integration" | ||
}, | ||
"expectation_suite_names": ["web.v1.integration_tests"] | ||
} | ||
] | ||
} |
Oops, something went wrong.