Skip to content

Commit

Permalink
Add in example validations for YAML
Browse files Browse the repository at this point in the history
  • Loading branch information
pflooky committed Dec 19, 2024
1 parent c04b45c commit c654f30
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 0 deletions.
13 changes: 13 additions & 0 deletions docker/data/custom/validation/account/json-account-validation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
name: "json_account_checks"
description: "Check account related fields have gone through system correctly"
dataSources:
json:
- options:
path: "app/src/test/resources/sample/json/account-gen"
validations:
- expr: "year BETWEEN 2021 AND 2022"
- expr: "amount < 90"
- expr: "updated_time == details.updated_by.time"
- aggType: "count"
aggExpr: "count == 1000"
81 changes: 81 additions & 0 deletions docker/data/custom/validation/all/all-validation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
---
name: "account_checks"
description: "Check account related fields have gone through system correctly"
dataSources:
json:
- options:
path: "app/src/test/resources/sample/json/txn-gen"
waitCondition:
pauseInSeconds: 1
validations:
- expr: "amount < 100"
- expr: "year == 2021"
errorThreshold: 0.1
- expr: "regexp_like(name, 'Peter .*')"
errorThreshold: 200
- preFilterExpr: "name == 'peter'"
expr: "amount > 50"
- groupByCols: ["account_id"]
aggType: "count"
aggExpr: "count == 1"
- columnNameType: "columnCountEqual"
count: "3"
- columnNameType: "columnCountBetween"
minCount: "1"
maxCount: "2"
- columnNameType: "columnNameMatchOrder"
names: ["account_id", "amount", "name"]
- columnNameType: "columnNameMatchSet"
names: ["account_id", "my_name"]
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
joinColumns: ["account_id"]
joinType: "outer"
validation:
expr: "my_first_json_customer_details.name == name"
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
joinColumns: ["account_id"]
joinType: "outer"
validation:
expr: "amount != my_first_json_balance"
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
joinColumns: ["expr:account_id == my_first_json_account_id"]
joinType: "outer"
validation:
groupByCols: ["account_id", "my_first_json_balance"]
aggExpr: "sum(amount) BETWEEN my_first_json_balance * 0.8 AND my_first_json_balance * 1.2"
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
joinColumns: ["account_id"]
joinType: "outer"
validation:
groupByCols: ["account_id", "my_first_json_balance"]
aggExpr: "sum(amount) BETWEEN my_first_json_balance * 0.8 AND my_first_json_balance * 1.2"
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
joinColumns: ["account_id"]
joinType: "anti"
validation:
aggType: "count"
aggExpr: "count == 0"
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
joinColumns: ["account_id"]
joinType: "outer"
validation:
aggType: "count"
aggExpr: "count == 30"
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
joinColumns: ["account_id"]
joinType: "outer"
validation:
upstreamDataSource: "my_third_json"
upstreamReadOptions: {}
joinColumns: ["account_id"]
joinType: "outer"
validation:
aggType: "count"
aggExpr: "count == 30"
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
{
"data_asset_type": null,
"expectation_suite_name": "taxi.demo",
"expectations": [
{
"expectation_type": "expect_table_columns_to_match_ordered_list",
"kwargs": {
"column_list": [
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
"passenger_count",
"trip_distance",
"rate_code_id",
"store_and_fwd_flag",
"pickup_location_id",
"dropoff_location_id",
"payment_type",
"fare_amount",
"extra",
"mta_tax",
"tip_amount",
"tolls_amount",
"improvement_surcharge",
"total_amount",
"congestion_surcharge"
]
},
"meta": {}
},
{
"expectation_type": "expect_table_row_count_to_be_between",
"kwargs": {
"max_value": 10000,
"min_value": 10000
},
"meta": {}
},
{
"expectation_type": "expect_column_min_to_be_between",
"kwargs": {
"column": "passenger_count",
"max_value": 1,
"min_value": 1
},
"meta": {}
},
{
"expectation_type": "expect_column_max_to_be_between",
"kwargs": {
"column": "passenger_count",
"max_value": 6,
"min_value": 6
},
"meta": {}
},
{
"expectation_type": "expect_column_mean_to_be_between",
"kwargs": {
"column": "passenger_count",
"max_value": 1.5716,
"min_value": 1.5716
},
"meta": {}
},
{
"expectation_type": "expect_column_median_to_be_between",
"kwargs": {
"column": "passenger_count",
"max_value": 1.0,
"min_value": 1.0
},
"meta": {}
},
{
"expectation_type": "expect_column_quantile_values_to_be_between",
"kwargs": {
"allow_relative_error": "lower",
"column": "passenger_count",
"quantile_ranges": {
"quantiles": [0.05, 0.25, 0.5, 0.75, 0.95],
"value_ranges": [[1, 1], [1, 1], [1, 1], [2, 2], [5, 5]]
}
},
"meta": {}
},
{
"expectation_type": "expect_column_values_to_be_in_set",
"kwargs": {
"column": "passenger_count",
"value_set": [1, 2, 3, 4, 5, 6]
},
"meta": {}
},
{
"expectation_type": "expect_column_values_to_not_be_null",
"kwargs": {
"column": "passenger_count"
},
"meta": {}
},
{
"expectation_type": "expect_column_proportion_of_unique_values_to_be_between",
"kwargs": {
"column": "passenger_count",
"max_value": 0.0006,
"min_value": 0.0006
},
"meta": {}
},
{
"expectation_type": "expect_column_values_to_be_in_type_list",
"kwargs": {
"column": "passenger_count",
"type_list": [
"INTEGER",
"integer",
"int",
"int_",
"int8",
"int16",
"int32",
"int64",
"uint8",
"uint16",
"uint32",
"uint64",
"Int8Dtype",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"UInt8Dtype",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"INT",
"INTEGER",
"INT64",
"TINYINT",
"BYTEINT",
"SMALLINT",
"BIGINT",
"IntegerType",
"LongType"
]
},
"meta": {}
}
],
"ge_cloud_id": null,
"meta": {
"citations": [
{
"batch_request": {
"data_asset_name": "yellow_tripdata_sample_2019-01.csv",
"data_connector_name": "default_inferred_data_connector_name",
"datasource_name": "data__dir",
"limit": 1000
},
"citation_date": "2022-08-23T13:56:02.653975Z",
"comment": "Created suite added via CLI"
}
],
"columns": {
"congestion_surcharge": {
"description": ""
},
"dropoff_datetime": {
"description": ""
},
"dropoff_location_id": {
"description": ""
},
"extra": {
"description": ""
},
"fare_amount": {
"description": ""
},
"improvement_surcharge": {
"description": ""
},
"mta_tax": {
"description": ""
},
"passenger_count": {
"description": ""
},
"payment_type": {
"description": ""
},
"pickup_datetime": {
"description": ""
},
"pickup_location_id": {
"description": ""
},
"rate_code_id": {
"description": ""
},
"store_and_fwd_flag": {
"description": ""
},
"tip_amount": {
"description": ""
},
"tolls_amount": {
"description": ""
},
"total_amount": {
"description": ""
},
"trip_distance": {
"description": ""
},
"vendor_id": {
"description": ""
}
},
"great_expectations_version": "0.15.19"
}
}
9 changes: 9 additions & 0 deletions docker/data/custom/validation/json/json-validation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
name: "account_checks"
description: "Check account related fields have gone through system correctly"
dataSources:
json:
- options:
path: "/tmp/yaml-validation-json-test"
validations:
- expr: "STARTSWITH(transaction_id, 'txn')"
13 changes: 13 additions & 0 deletions docker/data/custom/validation/simple-validation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
name: "account_checks"
description: "Check account related fields have gone through system correctly"
dataSources:
json:
- options:
path: "app/src/test/resources/sample/json/txn-gen"
validations:
- expr: "amount < 100"
- expr: "year == 2021"
errorThreshold: 0.1
- expr: "regexp_like(name, 'Peter .*')"
errorThreshold: 200

0 comments on commit c654f30

Please sign in to comment.