Skip to content

Commit

Permalink
Merge branch 'main' into staging/cgu-licitacao-contrato
Browse files Browse the repository at this point in the history
  • Loading branch information
mergify[bot] authored Dec 3, 2024
2 parents 7147b8d + 8a49317 commit 0abdbd0
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
2 changes: 0 additions & 2 deletions pipelines/datasets/br_ms_sinan/flows.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
# -*- coding: utf-8 -*-
from copy import deepcopy

from prefect.run_configs import KubernetesRun
from prefect.storage import GCS

from pipelines.utils.crawler_datasus.flows import flow_sinan
from pipelines.constants import constants
from pipelines.datasets.br_ms_sinan.schedules import (
Expand Down
1 change: 0 additions & 1 deletion pipelines/utils/crawler_datasus/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,6 @@
file_list=dbc_files,
dataset_id=dataset_id,
table_id=table_id,
chunk_size = 150000,
upstream_tasks=[dbf_files, dbc_files],
)

Expand Down
8 changes: 7 additions & 1 deletion pipelines/utils/crawler_datasus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""
General purpose functions for the br_ms_cnes project
"""

import gc
import asyncio
from datetime import datetime
from ftplib import FTP
Expand Down Expand Up @@ -88,6 +88,12 @@ def dbf_to_parquet(dbf: str, table_id: str, counter: int, chunk_size:int) -> st

df.to_parquet(parquet_filepath, index=None, compression='gzip')

del df

gc.collect()




except struct.error as err:
#unlink .partquer extension and remove dbf file
Expand Down

0 comments on commit 0abdbd0

Please sign in to comment.