Skip to content

Commit

Permalink
Adicionar proxie
Browse files Browse the repository at this point in the history
  • Loading branch information
Winzen committed Nov 27, 2024
1 parent 2e82422 commit 662f5b1
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions pipelines/utils/crawler_tse_eleicoes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,30 @@ def download_extract_zip(self, url: str, chunk_size=128) -> None:
"""
self.path_input.mkdir(parents=True, exist_ok=True)

# request_headers = {
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
# }

# r = requests.get(url, headers=request_headers, stream=True, timeout=60)

request_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
"Connection": "keep-alive"
}
proxies = {
"https": tse_constants.PROXY_LINK.value
}

r = requests.get(url, headers=request_headers, stream=True, timeout=60)
r = requests.get(url, headers=request_headers, proxies=proxies, verify=False, timeout=300)

save_path = self.path_input / url.split("/")[-1]

with open(save_path, "wb") as fd:
for chunk in r.iter_content(chunk_size=chunk_size):
fd.write(chunk)
fd.write(r.content)

# with open(save_path, "wb") as fd:
# for chunk in r.iter_content(chunk_size=chunk_size):
# fd.write(chunk)

with zipfile.ZipFile(save_path) as z:
z.extractall(self.path_input)
Expand Down

0 comments on commit 662f5b1

Please sign in to comment.