Skip to content

Commit

Permalink
remove logs
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktx committed Sep 4, 2024
1 parent 5d4bfd7 commit b7827b0
Showing 1 changed file with 18 additions and 54 deletions.
72 changes: 18 additions & 54 deletions pipelines/datasets/br_stf_corte_aberta/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,69 +16,33 @@
from selenium.webdriver.firefox.options import Options

def web_scrapping():
log("testando cloud")
log("Criando as pastas")
if not os.path.exists(stf_constants.STF_INPUT.value):
os.mkdir(stf_constants.STF_INPUT.value)
# options = Options()

# options.add_argument('--headless')
# options.add_argument('--no-sandbox')
# options.add_argument('--disable-dev-shm-usage')
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("--disable-extensions")
# options.add_argument("--incognito")

# # Configurações específicas de download no Firefox
# options.set_preference("browser.download.folderList", 2) # Use 2 para salvar no diretório especificado
# options.set_preference("browser.download.dir", stf_constants.STF_INPUT.value)
# options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv") # Specify MIME type for automatic download
# options.set_preference("browser.download.manager.showWhenStarting", False)
# options.set_preference("pdfjs.disabled", True) # Desativa o visualizador de PDFs interno


# driver = webdriver.Firefox(options=options)

options = webdriver.ChromeOptions()

# https://github.com/SeleniumHQ/selenium/issues/11637
prefs = {
"download.default_directory": stf_constants.STF_INPUT.value,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True,
}

options.add_experimental_option(
"prefs",
prefs,
)
options = Options()

options.add_argument("--headless")
# NOTE: The traditional --headless, and since version 96, Chrome has a new headless mode that allows users to get the full browser functionality (even run extensions). Between versions 96 to 108 it was --headless=chrome, after version 109 --headless=new
options.add_argument("--test-type")
options.add_argument("--disable-gpu")
options.add_argument("--no-first-run")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-default-browser-check")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--start-maximized")
options.add_argument(
"user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
)
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-extensions")
options.add_argument("--incognito")

# Configurações específicas de download no Firefox
options.set_preference("browser.download.folderList", 2) # Use 2 para salvar no diretório especificado
options.set_preference("browser.download.dir", stf_constants.STF_INPUT.value)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv") # Specify MIME type for automatic download
options.set_preference("browser.download.manager.showWhenStarting", False)
options.set_preference("pdfjs.disabled", True) # Desativa o visualizador de PDFs interno

driver = webdriver.Chrome(options=options)
log(driver.page_source)
log(1)

driver = webdriver.Firefox(options=options)
driver.get("https://transparencia.stf.jus.br/extensions/decisoes/decisoes.html")
time.sleep(10)
log(2)
driver.maximize_window()
time.sleep(15)
log(3)
WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="EXPORT-BUTTON-2"]/button'))).click()
time.sleep(15)
log(4)
driver.quit()


Expand Down Expand Up @@ -159,7 +123,7 @@ def partition_data(df: pd.DataFrame, column_name: list[str], output_directory: s


def check_for_data():
log("task 1")

web_scrapping()
log("Iniciando o check for data")
arquivos = os.listdir(stf_constants.STF_INPUT.value)
Expand Down

0 comments on commit b7827b0

Please sign in to comment.