diff --git a/src/parquet.rs b/src/parquet.rs index 30ec259..8dcfc40 100644 --- a/src/parquet.rs +++ b/src/parquet.rs @@ -14,7 +14,7 @@ pub struct MetricRequest { fn get_metrics_from_file( file_url: &String, columns: &[String], - geo_ids: Option<&[String]>, + geo_ids: Option<&[&str]>, ) -> Result { let mut cols: Vec = columns.iter().map(|c| col(c)).collect(); cols.push(col(GEO_ID_COL_NAME)); @@ -39,7 +39,7 @@ fn get_metrics_from_file( /// Given a set of metrics and optional geo_ids, this function will /// retrive all the required metrics from the cloud blob storage /// -pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[String]>) -> Result { +pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[&str]>) -> Result { let file_list: HashSet = metrics.iter().map(|m| m.file.clone()).collect(); let dfs: Result> = file_list @@ -59,22 +59,22 @@ pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[String]>) -> Res }) .collect(); - let joined_df = dfs?.iter().fold(None, |acc, df| match acc { - None => Some(df.clone()), - Some(acc_df) => Some( - // This unwrap should be ok because we would expect the - // previous calls to fail if "GEOID" is not present in - // it's columns - acc_df - .join( - df, - vec![GEO_ID_COL_NAME], - vec![GEO_ID_COL_NAME], - JoinArgs::new(JoinType::Inner), - ) - .unwrap(), - ), - }); + let mut joined_df: Option = None; + + // Merge the dataframes from each remove file in to a single + // dataframe + for df in dfs? { + if let Some(prev_dfs) = joined_df { + joined_df = Some(prev_dfs.join( + &df, + vec![GEO_ID_COL_NAME], + vec![GEO_ID_COL_NAME], + JoinArgs::new(JoinType::Inner), + )?); + } else { + joined_df = Some(df.clone()); + } + } joined_df.with_context(|| "Failed to combine data queries") } @@ -91,7 +91,6 @@ mod tests { column:"B17021_E006".into() }]; let df = get_metrics(&metrics, None); - println!("{df:#?}"); assert!(df.is_ok(), "We should get back a result"); let df = df.unwrap(); assert_eq!( @@ -123,10 +122,9 @@ mod tests { }]; let df = get_metrics( &metrics, - Some(&["1400000US01001020100".into(), "1400000US01001020300".into()]), + Some(&["1400000US01001020100", "1400000US01001020300"]), ); - println!("{df:#?}"); assert!(df.is_ok(), "We should get back a result"); let df = df.unwrap(); assert_eq!(