Skip to content

Commit

Permalink
Merge pull request #487 from basedosdados/br_cgu_dados_abertos
Browse files Browse the repository at this point in the history
fix table-approve with parquet files
  • Loading branch information
laura-l-amaral authored Mar 14, 2024
2 parents 27037f6 + 59bac18 commit 5efe1cc
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 13 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/scripts/table_approve.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def push_table_to_bq(
Dataset(dataset_id).update(mode="prod")
delete_storage_path = file_path.replace("./downloaded_data/", "")
print(
f"DELETE HEADER FILE FROM basedosdados/staing/{dataset_id}_staging/{table_id}/{delete_storage_path}"
f"DELETE HEADER FILE FROM basedosdados/staging/{dataset_id}_staging/{table_id}/{delete_storage_path}"
)
st = Storage(dataset_id=dataset_id, table_id=table_id)
st.delete_file(filename=delete_storage_path, mode="staging")
Expand Down Expand Up @@ -146,27 +146,27 @@ def save_header_files(dataset_id, table_id):
print("Found blob: ", str(blob.name))
print("Renamed blob: ", blob_path)
break
### save table header in storage

print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_name = blob_path.split("/")[-1]
file_type = file_name.split(".")[-1]

path = Path(blob_path.replace(f"/{file_name}", ""))
path.mkdir(parents=True, exist_ok=True)

### save table header in storage
if file_type == "csv":
print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_path = f"./{path}/table_approve_temp_file_271828.csv"
df.to_csv(file_path, index=False)
elif file_type == "parquet":
file_path = f"./{path}/table_approve_temp_file_271828.parquet"
df.to_parquet(file_path)
blob.download_to_filename(file_path)
print("SAVE HEADER FILE: ", file_path)
return file_path

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{{ config(alias="conjunto", schema="br_cgu_dados_abertos") }}
--
select
safe_cast(nullif(id, "") as string) id,
safe_cast(nullif(titulo, "") as string) nome,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{{ config(alias="organizacao", schema="br_cgu_dados_abertos") }}
--
select
safe_cast(nullif(o.id, "") as string) id,
safe_cast(nullif(o.titulo, "") as string) nome,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{{ config(alias="recurso", schema="br_cgu_dados_abertos") }}
--
select
safe_cast(nullif(id, "") as string) id,
safe_cast(nullif(id_conjunto, "") as string) id_conjunto,
Expand Down

0 comments on commit 5efe1cc

Please sign in to comment.