Skip to content

Commit

Permalink
save csv.gz in s3
Browse files Browse the repository at this point in the history
  • Loading branch information
silil committed Feb 1, 2024
1 parent b8f57a8 commit 15d3c01
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/triage/component/architect/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def build_matrix(
)

feature_queries = self.feature_load_queries(feature_dictionary, entity_date_table_name)
logger.spam(f"feature queries, number of queries: {len(feature_queries)}")
logger.debug(f"feature queries, number of queries: {len(feature_queries)}")

label_query = self.label_load_query(
label_name,
Expand All @@ -322,7 +322,7 @@ def build_matrix(
matrix_store.metadata = matrix_metadata
#labels = output.pop(matrix_store.label_column_name)
matrix_store.matrix_label_tuple = output, labels
#matrix_store.save()
matrix_store.save()
logger.info(f"Saving matrix metadata (yaml) for matrix {matrix_uuid}")
matrix_store.save_matrix_metadata()

Expand Down Expand Up @@ -578,7 +578,7 @@ def stitch_csvs(self, features_queries, label_query, matrix_store, matrix_uuid):
logger.debug(f"Time converting from polars to pandas (sec): {(end-start)/60}")
df.set_index(["entity_id", "as_of_date"], inplace=True)
logger.debug(f"df data types: {df.dtypes}")
logger.spam(f"Pandas DF memory usage: {df.memory_usage(deep=True).sum()/1000000} MB")
logger.debug(f"Pandas DF memory usage: {df.memory_usage(deep=True).sum()/1000000} MB")

logger.debug(f"Generating gzip from full matrix csv")
self.generate_gzip(path_, matrix_uuid)
Expand Down

0 comments on commit 15d3c01

Please sign in to comment.