-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add caching for CLI #75
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
96f6b37
Cache implementation
sgreenbury 5a06dca
Add test for caching
sgreenbury c84bbaf
Add cache feature
sgreenbury 56331cc
Add comment and revise doc comment
sgreenbury ef826ee
Add cache and formatters to required-features for binary
sgreenbury File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,31 @@ | ||
use std::default::Default; | ||
use std::fmt::Display; | ||
use std::path::Path; | ||
|
||
use anyhow::{anyhow, Result}; | ||
use futures::future::join_all; | ||
use log::debug; | ||
use log::info; | ||
use polars::{ | ||
frame::DataFrame, | ||
lazy::{ | ||
dsl::col, | ||
frame::{IntoLazy, LazyFrame, ScanArgsParquet}, | ||
}, | ||
prelude::{JoinArgs, JoinType, UnionArgs}, | ||
prelude::{DataFrame, JoinArgs, JoinType, ParquetCompression, ParquetWriter, UnionArgs}, | ||
}; | ||
use tokio::try_join; | ||
|
||
use crate::{config::Config, search::MetricId, COL}; | ||
|
||
/// This struct contains the base url and names of the files that contain the metadata. | ||
pub struct PATHS {} | ||
|
||
impl PATHS { | ||
pub const GEOMETRY_METADATA: &'static str = "geometry_metadata.parquet"; | ||
pub const METRIC_METADATA: &'static str = "metric_metadata.parquet"; | ||
pub const COUNTRY: &'static str = "country_metadata.parquet"; | ||
pub const SOURCE: &'static str = "source_data_releases.parquet"; | ||
pub const PUBLISHER: &'static str = "data_publishers.parquet"; | ||
/// This module contains the names of the files that contain the metadata. | ||
pub mod paths { | ||
pub const GEOMETRY_METADATA: &str = "geometry_metadata.parquet"; | ||
pub const METRIC_METADATA: &str = "metric_metadata.parquet"; | ||
pub const COUNTRY: &str = "country_metadata.parquet"; | ||
pub const SOURCE: &str = "source_data_releases.parquet"; | ||
pub const PUBLISHER: &str = "data_publishers.parquet"; | ||
} | ||
use paths as PATHS; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Refactored as module since no methods expected (see #56 (comment)) |
||
|
||
/// `CountryMetadataLoader` takes a country iso string | ||
/// along with a CountryMetadataPaths and provides methods | ||
|
@@ -50,7 +49,7 @@ impl ExpandedMetadata { | |
/// from a single `CountryMetadataLoader` or for all countries. | ||
/// It also provides the various functions for searching and | ||
/// getting `MetricRequests` from the catalogue. | ||
#[derive(Debug)] | ||
#[derive(Debug, PartialEq)] | ||
pub struct Metadata { | ||
pub metrics: DataFrame, | ||
pub geometries: DataFrame, | ||
|
@@ -59,6 +58,59 @@ pub struct Metadata { | |
pub countries: DataFrame, | ||
} | ||
|
||
#[cfg(feature = "cache")] | ||
fn path_to_df<P: AsRef<Path>>(path: P) -> anyhow::Result<DataFrame> { | ||
Ok(LazyFrame::scan_parquet(path, ScanArgsParquet::default())?.collect()?) | ||
} | ||
|
||
#[cfg(feature = "cache")] | ||
fn df_to_file<P: AsRef<Path>>(path: P, df: &DataFrame) -> anyhow::Result<()> { | ||
let file = std::fs::File::create(path)?; | ||
ParquetWriter::new(file) | ||
.with_compression(ParquetCompression::Zstd(None)) | ||
.finish(&mut df.clone())?; | ||
Ok(()) | ||
} | ||
|
||
#[cfg(feature = "cache")] | ||
fn prepend<P: AsRef<Path>>(cache_path: P, file_name: &str) -> std::path::PathBuf { | ||
cache_path.as_ref().join(file_name) | ||
} | ||
|
||
// Only include methods with "cache" feature since it requires a filesystem | ||
#[cfg(feature = "cache")] | ||
impl Metadata { | ||
pub fn from_cache<P: AsRef<Path>>(cache_dir: P) -> anyhow::Result<Self> { | ||
let metrics = path_to_df(prepend(&cache_dir, PATHS::METRIC_METADATA))?; | ||
let geometries = path_to_df(prepend(&cache_dir, PATHS::GEOMETRY_METADATA))?; | ||
let source_data_releases = path_to_df(prepend(&cache_dir, PATHS::SOURCE))?; | ||
let data_publishers = path_to_df(prepend(&cache_dir, PATHS::PUBLISHER))?; | ||
let countries = path_to_df(prepend(&cache_dir, PATHS::COUNTRY))?; | ||
Ok(Self { | ||
metrics, | ||
geometries, | ||
source_data_releases, | ||
data_publishers, | ||
countries, | ||
}) | ||
} | ||
|
||
pub fn write_cache<P: AsRef<Path>>(&self, cache_dir: P) -> anyhow::Result<()> { | ||
df_to_file(prepend(&cache_dir, PATHS::METRIC_METADATA), &self.metrics)?; | ||
df_to_file( | ||
prepend(&cache_dir, PATHS::GEOMETRY_METADATA), | ||
&self.geometries, | ||
)?; | ||
df_to_file( | ||
prepend(&cache_dir, PATHS::SOURCE), | ||
&self.source_data_releases, | ||
)?; | ||
df_to_file(prepend(&cache_dir, PATHS::PUBLISHER), &self.data_publishers)?; | ||
df_to_file(prepend(&cache_dir, PATHS::COUNTRY), &self.countries)?; | ||
Ok(()) | ||
} | ||
} | ||
|
||
/// Describes a fully specified selection plan. The MetricIds should all | ||
/// be the ID variant. Geometry and years are backed in now. | ||
/// Advice specifies and alternative options that the user should | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider moving this implementation to be part of the popgetter crate as part of #46 if the same caching approach can be used.