Skip to content

Commit

Permalink
Merge pull request #845 from basedosdados/staging/fix_sia
Browse files Browse the repository at this point in the history
[fix] br_ms_sia
  • Loading branch information
folhesgabriel authored Sep 11, 2024
2 parents 548602d + cbc58d6 commit cc54479
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
4 changes: 2 additions & 2 deletions pipelines/datasets/br_ms_sia/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
br_ms_sia_producao_ambulatorial.code_owners = ["Gabriel Pisa"]
br_ms_sia_producao_ambulatorial.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
br_ms_sia_producao_ambulatorial.run_config = KubernetesRun(image=constants.DOCKER_IMAGE.value)
#br_ms_sia_producao_ambulatorial.schedule = schedule_br_ms_sia_producao_ambulatorial
br_ms_sia_producao_ambulatorial.schedule = schedule_br_ms_sia_producao_ambulatorial


br_ms_sia_psicossocial = deepcopy(flow_siasus)
br_ms_sia_psicossocial.name = "br_ms_sia.psicossocial"
br_ms_sia_psicossocial.code_owners = ["Gabriel Pisa"]
br_ms_sia_psicossocial.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
br_ms_sia_psicossocial.run_config = KubernetesRun(image=constants.DOCKER_IMAGE.value)
#br_ms_sia_psicossocial.schedule = schedule_br_ms_sia_psicossocial
br_ms_sia_psicossocial.schedule = schedule_br_ms_sia_psicossocial
7 changes: 6 additions & 1 deletion pipelines/utils/crawler_datasus/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,12 @@
upstream_tasks=[wait_for_materialization],
)
flow_siasus.storage = GCS(constants.GCS_FLOWS_BUCKET.value)
flow_siasus.run_config = KubernetesRun(image=constants.DOCKER_IMAGE.value)
flow_siasus.run_config = KubernetesRun(
image=constants.DOCKER_IMAGE.value,
memory_limit = '12Gi',
memory_request = '4Gi',
cpu_limit = 1,
)



Expand Down
4 changes: 2 additions & 2 deletions pipelines/utils/crawler_datasus/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def is_empty(lista):


@task
def read_dbf_save_parquet_chunks(file_list: list, table_id: str, dataset_id:str= "br_ms_sia", chunk_size : int = 400000) -> str:
def read_dbf_save_parquet_chunks(file_list: list, table_id: str, dataset_id:str= "br_ms_sia", chunk_size : int = 100000) -> str:
"""
Convert dbc to parquet
"""
Expand All @@ -362,7 +362,7 @@ def read_dbf_save_parquet_chunks(file_list: list, table_id: str, dataset_id:str=

dbf_file_list = [file.replace(".dbc", ".dbf") for file in file_list]
_counter = 0
log(f'----coutner {_counter}')
log(f'----counter {_counter}')
for file in tqdm(dbf_file_list):


Expand Down

0 comments on commit cc54479

Please sign in to comment.