Skip to content

Commit

Permalink
Teste Proxy Parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
Winzen committed Nov 27, 2024
1 parent 70eeaa3 commit a3cbcee
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
4 changes: 3 additions & 1 deletion pipelines/utils/crawler_tse_eleicoes/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@

table_id = Parameter("table_id", required=True)

port_proxy = Parameter("proxy", required=True)

materialization_mode = Parameter(
"materialization_mode", default="dev", required=False
)
Expand All @@ -58,7 +60,7 @@
prefix="Dump: ", dataset_id=dataset_id, table_id=table_id, wait=table_id
)

flow = flows_control(table_id=table_id, mode=materialization_mode, upstream_tasks=[rename_flow_run])
flow = flows_control(table_id=table_id, mode=materialization_mode, proxy=port_proxy, upstream_tasks=[rename_flow_run])

data_source_max_date = get_data_source_max_date(flow_class=flow, upstream_tasks=[flow])

Expand Down
3 changes: 2 additions & 1 deletion pipelines/utils/crawler_tse_eleicoes/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

@task
# Classes de formatação
def flows_control(table_id: str, mode: str) -> Type[T]:
def flows_control(table_id: str, proxy: str, mode: str) -> Type[T]:

catalog = flows_catalog()

Expand All @@ -28,6 +28,7 @@ def flows_control(table_id: str, mode: str) -> Type[T]:
source=catalog.get(table_id)["source"],
date_column_name=catalog.get(table_id)["date_column_name"],
date_format=catalog.get(table_id)["date_format"],
proxy=proxy,
mode=mode)

return flow
Expand Down
5 changes: 3 additions & 2 deletions pipelines/utils/crawler_tse_eleicoes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,14 @@ def flows_catalog() -> dict:

class BrTseEleicoes:

def __init__(self, urls: list, table_id: str, source: str,
def __init__(self, urls: list, table_id: str, source: str, proxy: str,
date_column_name: str, date_format: str, year: int = 2024, mode: str = "dev"):

self.urls = urls
self.year = year
self.table_id = table_id
self.source = source
self.proxy = proxy
self.date_column_name = date_column_name
self.date_format = date_format
self.billing_project_id = tse_constants.MODE_TO_PROJECT_DICT.value[mode]
Expand Down Expand Up @@ -127,7 +128,7 @@ def download_extract_zip(self, url: str, chunk_size=128) -> None:
"Connection": "keep-alive"
}
proxies = {
"https": tse_constants.PROXY_LINK.value
"https": self.proxy
}

r = requests.get(url, headers=request_headers, proxies=proxies, verify=False, timeout=300)
Expand Down

0 comments on commit a3cbcee

Please sign in to comment.