-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adição da table violencia_escola a br_fbsp_absp
- Loading branch information
Showing
8 changed files
with
234 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
config( | ||
alias='violencia_escola', | ||
schema='br_fbsp_absp' | ||
) | ||
}} | ||
SELECT | ||
SAFE_CAST(ano AS INT64) ano, | ||
SAFE_CAST(uf AS STRING) uf, | ||
SAFE_CAST(tema AS STRING) tema, | ||
SAFE_CAST(item AS STRING) item, | ||
SAFE_CAST(quantidade_escola AS FLOAT64) quantidade_escola | ||
FROM basedosdados-staging.br_fbsp_absp_staging.violencia_escola AS t |
File renamed without changes.
File renamed without changes.
148 changes: 74 additions & 74 deletions
148
models/br_fbsp_absp/code/download_data.py → models/br_fbsp_absp/code/uf/download_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,74 +1,74 @@ | ||
from selenium import webdriver | ||
from selenium.webdriver.firefox.options import Options | ||
from selenium.webdriver.support.ui import WebDriverWait | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.support import expected_conditions as EC | ||
from time import sleep | ||
import os | ||
|
||
|
||
def select_selection_download(website): | ||
|
||
abas_dic = [[2, 18, 'tableau_mvi-downloadData'], | ||
[7, 2, 'tableau_estupro-downloadData'], | ||
[12, 5, 'tableau_patrimonial-downloadData'], | ||
[17, 1, 'tableau_mvi_armas-downloadData'], | ||
[22, 1, 'tableau_mvi_gastos-downloadData'], | ||
[27, 1, 'tableau_mvi_desap-downloadData'], | ||
[32, 1, 'tableau_mvi_pop_pris-downloadData']] | ||
|
||
click(website, By.CSS_SELECTOR, "a.dropdown-toggle") | ||
dropdown = website.find_element(By.CSS_SELECTOR, "ul.dropdown-menu") | ||
dropdown = dropdown.find_elements(By.CSS_SELECTOR, "li a") | ||
|
||
for aba_n, aba in enumerate(dropdown): | ||
sleep(2) | ||
website.execute_script("arguments[0].click();", aba) | ||
|
||
for n in range(abas_dic[aba_n][1]): | ||
try: | ||
|
||
click(website, By.CSS_SELECTOR, f'[aria-owns="bs-select-{abas_dic[aba_n][0]}"]') | ||
|
||
click(website, By.ID, f'bs-select-{abas_dic[aba_n][0]}-{n}') | ||
|
||
click(website, By.ID, abas_dic[aba_n][2]) | ||
|
||
except Exception as Error: | ||
print(Error) | ||
break | ||
|
||
|
||
def create_website(): | ||
options = Options() | ||
options.add_argument('-headless') | ||
options.set_preference('browser.download.folderList', 2) | ||
options.set_preference('browser.download.manager.showWhenStarting', False) | ||
options.set_preference('browser.download.dir', os.getcwd().replace("code", "input")) | ||
options.set_preference('browser.helperApps.neverAsk.saveToDisk', "application/x-gzip") | ||
|
||
website = webdriver.Firefox(options=options) | ||
wait = WebDriverWait(website, 10) | ||
|
||
website.get( | ||
f"http://forumseguranca.org.br:3838/") | ||
wait.until(EC.visibility_of_element_located((By.ID, 'tableau_mvi-downloadData'))) | ||
|
||
return website | ||
|
||
|
||
def click(website, by, match, time_sleep=2): | ||
where_click = website.find_element(by, match) | ||
sleep(time_sleep) | ||
website.execute_script("arguments[0].click();", where_click) | ||
|
||
|
||
def download_data(): | ||
website = create_website() | ||
select_selection_download(website) | ||
sleep(5) | ||
website.quit() | ||
|
||
|
||
if __name__ == '__main__': | ||
download_data() | ||
from selenium import webdriver | ||
from selenium.webdriver.firefox.options import Options | ||
from selenium.webdriver.support.ui import WebDriverWait | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.support import expected_conditions as EC | ||
from time import sleep | ||
import os | ||
|
||
|
||
def select_selection_download(website): | ||
|
||
abas_dic = [[2, 18, 'tableau_mvi-downloadData'], | ||
[7, 2, 'tableau_estupro-downloadData'], | ||
[12, 5, 'tableau_patrimonial-downloadData'], | ||
[17, 1, 'tableau_mvi_armas-downloadData'], | ||
[22, 1, 'tableau_mvi_gastos-downloadData'], | ||
[27, 1, 'tableau_mvi_desap-downloadData'], | ||
[32, 1, 'tableau_mvi_pop_pris-downloadData']] | ||
|
||
click(website, By.CSS_SELECTOR, "a.dropdown-toggle") | ||
dropdown = website.find_element(By.CSS_SELECTOR, "ul.dropdown-menu") | ||
dropdown = dropdown.find_elements(By.CSS_SELECTOR, "li a") | ||
|
||
for aba_n, aba in enumerate(dropdown): | ||
sleep(2) | ||
website.execute_script("arguments[0].click();", aba) | ||
|
||
for n in range(abas_dic[aba_n][1]): | ||
try: | ||
|
||
click(website, By.CSS_SELECTOR, f'[aria-owns="bs-select-{abas_dic[aba_n][0]}"]') | ||
|
||
click(website, By.ID, f'bs-select-{abas_dic[aba_n][0]}-{n}') | ||
|
||
click(website, By.ID, abas_dic[aba_n][2]) | ||
|
||
except Exception as Error: | ||
print(Error) | ||
break | ||
|
||
|
||
def create_website(): | ||
options = Options() | ||
options.add_argument('-headless') | ||
options.set_preference('browser.download.folderList', 2) | ||
options.set_preference('browser.download.manager.showWhenStarting', False) | ||
options.set_preference('browser.download.dir', os.getcwd().replace("code", "input")) | ||
options.set_preference('browser.helperApps.neverAsk.saveToDisk', "application/x-gzip") | ||
|
||
website = webdriver.Firefox(options=options) | ||
wait = WebDriverWait(website, 10) | ||
|
||
website.get( | ||
f"http://forumseguranca.org.br:3838/") | ||
wait.until(EC.visibility_of_element_located((By.ID, 'tableau_mvi-downloadData'))) | ||
|
||
return website | ||
|
||
|
||
def click(website, by, match, time_sleep=2): | ||
where_click = website.find_element(by, match) | ||
sleep(time_sleep) | ||
website.execute_script("arguments[0].click();", where_click) | ||
|
||
|
||
def download_data(): | ||
website = create_website() | ||
select_selection_download(website) | ||
sleep(5) | ||
website.quit() | ||
|
||
|
||
if __name__ == '__main__': | ||
download_data() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import pandas as pd | ||
from dictionaries import ufs, temas | ||
|
||
|
||
def addition_df(xls, iloc_slice: list, table_n: int, abas: list, tema: str): | ||
temp_df = pd.read_excel(xls, f'T{table_n}', header=7) | ||
temp_df = temp_df.dropna(thresh=2) | ||
if len(iloc_slice) > 0: | ||
temp_df = temp_df.iloc[:, iloc_slice] | ||
|
||
real_df = pd.concat([create_model_dataframe(temp_df, n, tema, column) | ||
for n, column in enumerate(abas)]) | ||
return real_df | ||
|
||
|
||
def create_model_dataframe(temp_df, n: int, tema: str, column: str, ano: int = 2021): | ||
|
||
columns = ["ano", "uf", "tema", "item", "quantidade_escola"] | ||
model_df = pd.DataFrame(columns=columns) | ||
model_df["uf"] = temp_df.iloc[:, 0] | ||
model_df["tema"] = tema | ||
model_df["ano"] = ano | ||
model_df["item"] = column | ||
model_df["quantidade_escola"] = temp_df.iloc[:, n + 1] | ||
|
||
return model_df | ||
|
||
|
||
def create_temp_dadataframe(xls, number_table: int, tema: str): | ||
|
||
if number_table == 95: | ||
temp_df = addition_df(xls, [0, 1, 3, 5], number_table, ["Sim", "Não", "Sem Resposta"], tema) | ||
|
||
elif 95 < number_table < 109: | ||
temp_df = addition_df(xls, [0, 1, 3, 5, 7], number_table, | ||
["Nunca", "Poucas vezes", "Várias vezes", "Sem resposta"], | ||
tema) | ||
|
||
elif 109 <= number_table <= 110: | ||
temp_df = addition_df(xls, [0, 1, 3, 5, 7, 9], number_table, | ||
["Muito adequado", "Adequado", "Inadequado", "Muito inadequado", "Sem resposta"], | ||
tema) | ||
|
||
elif number_table == 111: | ||
temp_df = addition_df(xls, [], number_table, | ||
["Violência", "Bullying", "Machismo", "Homofobia", "Uso de drogas", | ||
"Relações étnico-raciais/racismo"], tema) | ||
else: | ||
raise ValueError("number_table fora do permitido. Apenas entre 95 á 111") | ||
|
||
return temp_df | ||
|
||
|
||
def get_clean_data() -> None: | ||
|
||
xls = pd.ExcelFile('../input/anuario-2023.xlsx') | ||
|
||
df = pd.concat([create_temp_dadataframe(xls, number_table, tema) for tema, number_table in temas.items()]) | ||
df = df.replace(ufs.keys(), ufs.values()) | ||
|
||
df = df.drop(df[df["uf"] == "Brasil"].index) | ||
|
||
df["tema"] = df["tema"].str.replace( | ||
r"% de preenchimento - Temáticas", "Proporção de escolas com projeto no tema") | ||
|
||
df.to_csv("../output/br_fbsp_absp_escola_2021.csv", index=False) | ||
|
||
|
||
if __name__ == '__main__': | ||
get_clean_data() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
ufs = {'Acre': 'AC', | ||
'Alagoas': 'AL', | ||
'Amapá': 'AP', | ||
'Amazonas': 'AM', | ||
'Bahia': 'BA', | ||
'Ceará': 'CE', | ||
'Distrito Federal': 'DF', | ||
'Espírito Santo': 'ES', | ||
'Goiás': 'GO', | ||
'Maranhão': 'MA', | ||
'Mato Grosso': 'MT', | ||
'Mato Grosso do Sul': 'MS', | ||
'Minas Gerais': 'MG', | ||
'Pará': 'PA', | ||
'Paraíba': 'PB', | ||
'Paraná': 'PR', | ||
'Pernambuco': 'PE', | ||
'Piauí': 'PI', | ||
'Rio de Janeiro': 'RJ', | ||
'Rio Grande do Norte': 'RN', | ||
'Rio Grande do Sul': 'RS', | ||
'Rondônia': 'RO', | ||
'Roraima': 'RR', | ||
'Santa Catarina': 'SC', | ||
'São Paulo': 'SP', | ||
'Sergipe': 'SE', | ||
'Tocantins': 'TO'} | ||
|
||
temas = {'O calendário escolar de 2021 foi interrompido durante vários dias por episódios de violência?': 95, | ||
'Atentado à vida': 96, | ||
'Lesão corporal': 97, | ||
'Roubo ou furto': 98, | ||
'Tráfico de drogas': 99, | ||
'Permanência de pessoas sob efeito de álcool': 100, | ||
'Permanência de pessoas sob efeito de drogas': 101, | ||
'Porte de arma (revólver, faca, canivete etc.)': 102, | ||
'Assédio sexual': 103, | ||
'Discriminação': 104, | ||
'Bullying (ameaças ou ofensas verbais)': 105, | ||
'Invasão do espaço escolar': 106, | ||
'Depredação do patrimônio escolar (vandalismo)': 107, | ||
'Tiroteio ou bala perdida': 108, | ||
'Condições de segurança na entrada e saída da escola': 109, | ||
'Muros e/ou grades que isolam a escola do ambiente externo': 110, | ||
'% de preenchimento - Temáticas': 111} |
19 changes: 19 additions & 0 deletions
19
models/br_fbsp_absp/code/violencia_escola/download_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import requests | ||
|
||
|
||
def download_data(): | ||
|
||
url = f"https://forumseguranca.org.br/wp-content/uploads/2023/07/anuario-2023.xlsx" | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
|
||
# TODO fix table names | ||
with open(f"../input/anuario-2023.xlsx", "wb") as f: | ||
content_as_string = response.content | ||
f.write(content_as_string) | ||
|
||
print(f"Successfully downloaded") | ||
|
||
|
||
if __name__ == "__main__": | ||
download_data() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters