Skip to content

Commit

Permalink
refactor: improve the way we build expected output
Browse files Browse the repository at this point in the history
  • Loading branch information
timvw committed Mar 27, 2024
1 parent 97e385f commit ab4cd03
Showing 1 changed file with 124 additions and 55 deletions.
179 changes: 124 additions & 55 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,20 @@ use assert_cmd::cargo::CargoError;
use assert_cmd::prelude::*;
use datafusion::common::DataFusionError;
use predicates::prelude::*;
use predicates::str::RegexPredicate;
use std::env;
use std::process::Command;

fn configure_minio() {
env::set_var("AWS_REGION", "eu-central-1");
env::set_var("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE");
env::set_var(
"AWS_SECRET_ACCESS_KEY",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
);
env::set_var("AWS_ENDPOINT_URL", "http://localhost:9000");
}

fn map_cargo_to_datafusion_error(e: CargoError) -> DataFusionError {
DataFusionError::External(Box::new(e))
}
Expand All @@ -13,6 +24,16 @@ fn get_qv_cmd() -> datafusion::common::Result<Command> {
Command::cargo_bin(env!("CARGO_PKG_NAME")).map_err(map_cargo_to_datafusion_error)
}

fn get_qv_testing_path(rel_data_path: &str) -> String {
let testing_path = env::var("QV_TESTING_PATH").unwrap_or_else(|_| "./testing".to_string());
format!("{}/{}", testing_path, rel_data_path)
}

fn build_row_regex_predicate(columns: Vec<&str>) -> RegexPredicate {
let pattern = columns.join("\\s*|\\s*");
predicate::str::is_match(pattern).unwrap()
}

#[tokio::test]
async fn run_without_file_exits_with_usage() -> datafusion::common::Result<()> {
let mut cmd = get_qv_cmd()?;
Expand All @@ -26,15 +47,40 @@ async fn run_without_file_exits_with_usage() -> datafusion::common::Result<()> {
async fn run_with_local_avro_file() -> datafusion::common::Result<()> {
let mut cmd = get_qv_cmd()?;
let cmd = cmd.arg(get_qv_testing_path("data/avro/alltypes_plain.avro"));
cmd.assert().success()
.stdout(predicate::str::contains("| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |").trim())
.stdout(predicate::str::contains("| 4 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30332f30312f3039 | 30 | 2009-03-01T00:00:00 |").trim());
Ok(())
}

fn get_qv_testing_path(rel_data_path: &str) -> String {
let testing_path = env::var("QV_TESTING_PATH").unwrap_or_else(|_| "./testing".to_string());
format!("{}/{}", testing_path, rel_data_path)
let header_predicate = build_row_regex_predicate(vec![
"id",
"bool_col",
"tinyint_col",
"smallint_col",
"int_col",
"bigint_col",
"float_col",
"double_col",
"date_string_col",
"string_col",
"timestamp_col",
]);

let data_predicate = build_row_regex_predicate(vec![
"4",
"true",
"0",
"0",
"0",
"0",
"0.0",
"0.0",
"30332f30312f3039",
"30",
"2009-03-01T00:00:00",
]);

cmd.assert()
.success()
.stdout(header_predicate)
.stdout(data_predicate);
Ok(())
}

#[tokio::test]
Expand All @@ -43,20 +89,18 @@ async fn run_with_local_parquet_file() -> datafusion::common::Result<()> {
let cmd = cmd.arg(get_qv_testing_path(
"data/parquet/generated_simple_numerics/blogs.parquet",
));

let header_predicate = build_row_regex_predicate(vec!["reply", "blog_id"]);

let data_predicate = build_row_regex_predicate(vec![
"\\{reply_id: 332770973, next_id: }",
"-1473106667809783919",
]);

cmd.assert()
.success()
.stdout(
predicate::str::contains(
r#"| reply | blog_id |"#,
)
.trim(),
)
.stdout(
predicate::str::contains(
r#"| {reply_id: 332770973, next_id: } | -1473106667809783919 |"#,
)
.trim(),
);
.stdout(header_predicate)
.stdout(data_predicate);
Ok(())
}

Expand All @@ -66,34 +110,27 @@ async fn run_with_local_parquet_files_in_folder() -> datafusion::common::Result<
let cmd = cmd
.arg(&get_qv_testing_path("data/iceberg/db/COVID-19_NYT/data"))
.arg("-q")
.arg("select * from tbl order by date desc");
.arg("select * from tbl order by date, county, state, fips, cases, deaths");

let header_predicate =
build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]);

let data_predicate = build_row_regex_predicate(vec![
"2020-01-21",
"Snohomish",
"Washington",
"53061",
"1",
"0",
]);

cmd.assert()
.success()
.stdout(
predicate::str::contains(
r#"| date | county | state | fips | cases | deaths |"#,
)
.trim(),
)
.stdout(
predicate::str::contains(
r#"| 2021-03-11 | Bibb | Alabama | 1007 | 2474 | 58 |"#,
)
.trim(),
);
.stdout(header_predicate)
.stdout(data_predicate);
Ok(())
}

fn configure_minio() {
env::set_var("AWS_REGION", "eu-central-1");
env::set_var("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE");
env::set_var(
"AWS_SECRET_ACCESS_KEY",
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
);
env::set_var("AWS_ENDPOINT_URL", "http://localhost:9000");
}

#[tokio::test]
async fn run_with_s3_parquet_file() -> datafusion::common::Result<()> {
configure_minio();
Expand All @@ -103,19 +140,51 @@ async fn run_with_s3_parquet_file() -> datafusion::common::Result<()> {
.arg("s3://data/iceberg/db/COVID-19_NYT/data/00000-2-2d39563f-6901-4e2d-9903-84a8eab8ac3d-00001.parquet")
.arg("-q")
.arg("select * from tbl order by date, county, state, fips, cases, deaths");

let header_predicate =
build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]);

let data_predicate = build_row_regex_predicate(vec![
"2020-01-21",
"Snohomish",
"Washington",
"53061",
"1",
"0",
]);

cmd.assert()
.success()
.stdout(
predicate::str::contains(
r#"| date | county | state | fips | cases | deaths |"#,
)
.trim(),
)
.stdout(
predicate::str::contains(
r#"| 2020-01-21 | Snohomish | Washington | 53061 | 1 | 0 |"#,
)
.trim(),
);
.stdout(header_predicate)
.stdout(data_predicate);
Ok(())
}
/*
#[tokio::test]
async fn run_with_s3_parquet_files_in_folder() -> datafusion::common::Result<()> {
configure_minio();
let mut cmd = get_qv_cmd()?;
let cmd = cmd
.arg(&get_qv_testing_path("s3://data/iceberg/db/COVID-19_NYT/data"))
.arg("-q")
.arg("select * from tbl order by date, county, state, fips, cases, deaths");
let header_predicate =
build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]);
let data_predicate = build_row_regex_predicate(vec![
"2020-01-21",
"Snohomish",
"Washington",
"53061",
"1",
"0",
]);
cmd.assert()
.success()
.stdout(header_predicate)
.stdout(data_predicate);
Ok(())
}*/

0 comments on commit ab4cd03

Please sign in to comment.