Skip to content

Commit

Permalink
Adição da table violencia_escola a br_fbsp_absp
Browse files Browse the repository at this point in the history
  • Loading branch information
Winzen committed Jan 4, 2024
1 parent 58e4fe6 commit 7fdb6dd
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 74 deletions.
13 changes: 13 additions & 0 deletions models/br_fbsp_absp/br_fbsp_absp__violencia_escola.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
config(
alias='violencia_escola',
schema='br_fbsp_absp'
)
}}
SELECT
SAFE_CAST(ano AS INT64) ano,
SAFE_CAST(uf AS STRING) uf,
SAFE_CAST(tema AS STRING) tema,
SAFE_CAST(item AS STRING) item,
SAFE_CAST(quantidade_escola AS FLOAT64) quantidade_escola
FROM basedosdados-staging.br_fbsp_absp_staging.violencia_escola AS t
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,74 +1,74 @@
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
import os


def select_selection_download(website):

abas_dic = [[2, 18, 'tableau_mvi-downloadData'],
[7, 2, 'tableau_estupro-downloadData'],
[12, 5, 'tableau_patrimonial-downloadData'],
[17, 1, 'tableau_mvi_armas-downloadData'],
[22, 1, 'tableau_mvi_gastos-downloadData'],
[27, 1, 'tableau_mvi_desap-downloadData'],
[32, 1, 'tableau_mvi_pop_pris-downloadData']]

click(website, By.CSS_SELECTOR, "a.dropdown-toggle")
dropdown = website.find_element(By.CSS_SELECTOR, "ul.dropdown-menu")
dropdown = dropdown.find_elements(By.CSS_SELECTOR, "li a")

for aba_n, aba in enumerate(dropdown):
sleep(2)
website.execute_script("arguments[0].click();", aba)

for n in range(abas_dic[aba_n][1]):
try:

click(website, By.CSS_SELECTOR, f'[aria-owns="bs-select-{abas_dic[aba_n][0]}"]')

click(website, By.ID, f'bs-select-{abas_dic[aba_n][0]}-{n}')

click(website, By.ID, abas_dic[aba_n][2])

except Exception as Error:
print(Error)
break


def create_website():
options = Options()
options.add_argument('-headless')
options.set_preference('browser.download.folderList', 2)
options.set_preference('browser.download.manager.showWhenStarting', False)
options.set_preference('browser.download.dir', os.getcwd().replace("code", "input"))
options.set_preference('browser.helperApps.neverAsk.saveToDisk', "application/x-gzip")

website = webdriver.Firefox(options=options)
wait = WebDriverWait(website, 10)

website.get(
f"http://forumseguranca.org.br:3838/")
wait.until(EC.visibility_of_element_located((By.ID, 'tableau_mvi-downloadData')))

return website


def click(website, by, match, time_sleep=2):
where_click = website.find_element(by, match)
sleep(time_sleep)
website.execute_script("arguments[0].click();", where_click)


def download_data():
website = create_website()
select_selection_download(website)
sleep(5)
website.quit()


if __name__ == '__main__':
download_data()
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
import os


def select_selection_download(website):

abas_dic = [[2, 18, 'tableau_mvi-downloadData'],
[7, 2, 'tableau_estupro-downloadData'],
[12, 5, 'tableau_patrimonial-downloadData'],
[17, 1, 'tableau_mvi_armas-downloadData'],
[22, 1, 'tableau_mvi_gastos-downloadData'],
[27, 1, 'tableau_mvi_desap-downloadData'],
[32, 1, 'tableau_mvi_pop_pris-downloadData']]

click(website, By.CSS_SELECTOR, "a.dropdown-toggle")
dropdown = website.find_element(By.CSS_SELECTOR, "ul.dropdown-menu")
dropdown = dropdown.find_elements(By.CSS_SELECTOR, "li a")

for aba_n, aba in enumerate(dropdown):
sleep(2)
website.execute_script("arguments[0].click();", aba)

for n in range(abas_dic[aba_n][1]):
try:

click(website, By.CSS_SELECTOR, f'[aria-owns="bs-select-{abas_dic[aba_n][0]}"]')

click(website, By.ID, f'bs-select-{abas_dic[aba_n][0]}-{n}')

click(website, By.ID, abas_dic[aba_n][2])

except Exception as Error:
print(Error)
break


def create_website():
options = Options()
options.add_argument('-headless')
options.set_preference('browser.download.folderList', 2)
options.set_preference('browser.download.manager.showWhenStarting', False)
options.set_preference('browser.download.dir', os.getcwd().replace("code", "input"))
options.set_preference('browser.helperApps.neverAsk.saveToDisk', "application/x-gzip")

website = webdriver.Firefox(options=options)
wait = WebDriverWait(website, 10)

website.get(
f"http://forumseguranca.org.br:3838/")
wait.until(EC.visibility_of_element_located((By.ID, 'tableau_mvi-downloadData')))

return website


def click(website, by, match, time_sleep=2):
where_click = website.find_element(by, match)
sleep(time_sleep)
website.execute_script("arguments[0].click();", where_click)


def download_data():
website = create_website()
select_selection_download(website)
sleep(5)
website.quit()


if __name__ == '__main__':
download_data()
70 changes: 70 additions & 0 deletions models/br_fbsp_absp/code/violencia_escola/clean_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pandas as pd
from dictionaries import ufs, temas


def addition_df(xls, iloc_slice: list, table_n: int, abas: list, tema: str):
temp_df = pd.read_excel(xls, f'T{table_n}', header=7)
temp_df = temp_df.dropna(thresh=2)
if len(iloc_slice) > 0:
temp_df = temp_df.iloc[:, iloc_slice]

real_df = pd.concat([create_model_dataframe(temp_df, n, tema, column)
for n, column in enumerate(abas)])
return real_df


def create_model_dataframe(temp_df, n: int, tema: str, column: str, ano: int = 2021):

columns = ["ano", "uf", "tema", "item", "quantidade_escola"]
model_df = pd.DataFrame(columns=columns)
model_df["uf"] = temp_df.iloc[:, 0]
model_df["tema"] = tema
model_df["ano"] = ano
model_df["item"] = column
model_df["quantidade_escola"] = temp_df.iloc[:, n + 1]

return model_df


def create_temp_dadataframe(xls, number_table: int, tema: str):

if number_table == 95:
temp_df = addition_df(xls, [0, 1, 3, 5], number_table, ["Sim", "Não", "Sem Resposta"], tema)

elif 95 < number_table < 109:
temp_df = addition_df(xls, [0, 1, 3, 5, 7], number_table,
["Nunca", "Poucas vezes", "Várias vezes", "Sem resposta"],
tema)

elif 109 <= number_table <= 110:
temp_df = addition_df(xls, [0, 1, 3, 5, 7, 9], number_table,
["Muito adequado", "Adequado", "Inadequado", "Muito inadequado", "Sem resposta"],
tema)

elif number_table == 111:
temp_df = addition_df(xls, [], number_table,
["Violência", "Bullying", "Machismo", "Homofobia", "Uso de drogas",
"Relações étnico-raciais/racismo"], tema)
else:
raise ValueError("number_table fora do permitido. Apenas entre 95 á 111")

return temp_df


def get_clean_data() -> None:

xls = pd.ExcelFile('../input/anuario-2023.xlsx')

df = pd.concat([create_temp_dadataframe(xls, number_table, tema) for tema, number_table in temas.items()])
df = df.replace(ufs.keys(), ufs.values())

df = df.drop(df[df["uf"] == "Brasil"].index)

df["tema"] = df["tema"].str.replace(
r"% de preenchimento - Temáticas", "Proporção de escolas com projeto no tema")

df.to_csv("../output/br_fbsp_absp_escola_2021.csv", index=False)


if __name__ == '__main__':
get_clean_data()
45 changes: 45 additions & 0 deletions models/br_fbsp_absp/code/violencia_escola/dictionaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
ufs = {'Acre': 'AC',
'Alagoas': 'AL',
'Amapá': 'AP',
'Amazonas': 'AM',
'Bahia': 'BA',
'Ceará': 'CE',
'Distrito Federal': 'DF',
'Espírito Santo': 'ES',
'Goiás': 'GO',
'Maranhão': 'MA',
'Mato Grosso': 'MT',
'Mato Grosso do Sul': 'MS',
'Minas Gerais': 'MG',
'Pará': 'PA',
'Paraíba': 'PB',
'Paraná': 'PR',
'Pernambuco': 'PE',
'Piauí': 'PI',
'Rio de Janeiro': 'RJ',
'Rio Grande do Norte': 'RN',
'Rio Grande do Sul': 'RS',
'Rondônia': 'RO',
'Roraima': 'RR',
'Santa Catarina': 'SC',
'São Paulo': 'SP',
'Sergipe': 'SE',
'Tocantins': 'TO'}

temas = {'O calendário escolar de 2021 foi interrompido durante vários dias por episódios de violência?': 95,
'Atentado à vida': 96,
'Lesão corporal': 97,
'Roubo ou furto': 98,
'Tráfico de drogas': 99,
'Permanência de pessoas sob efeito de álcool': 100,
'Permanência de pessoas sob efeito de drogas': 101,
'Porte de arma (revólver, faca, canivete etc.)': 102,
'Assédio sexual': 103,
'Discriminação': 104,
'Bullying (ameaças ou ofensas verbais)': 105,
'Invasão do espaço escolar': 106,
'Depredação do patrimônio escolar (vandalismo)': 107,
'Tiroteio ou bala perdida': 108,
'Condições de segurança na entrada e saída da escola': 109,
'Muros e/ou grades que isolam a escola do ambiente externo': 110,
'% de preenchimento - Temáticas': 111}
19 changes: 19 additions & 0 deletions models/br_fbsp_absp/code/violencia_escola/download_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import requests


def download_data():

url = f"https://forumseguranca.org.br/wp-content/uploads/2023/07/anuario-2023.xlsx"
response = requests.get(url)
response.raise_for_status()

# TODO fix table names
with open(f"../input/anuario-2023.xlsx", "wb") as f:
content_as_string = response.content
f.write(content_as_string)

print(f"Successfully downloaded")


if __name__ == "__main__":
download_data()
13 changes: 13 additions & 0 deletions models/br_fbsp_absp/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,16 @@ models:
description: Quantidade de População do sistema penitenciário
- name: despesa_empenhada_seguranca_publica
description: Quantidade de Despesas empenhadas na Função Segurança Pública
- name: br_fbsp_absp__violencia_escola
description: Tabela com indicadores que auxiliam entender o cenário de violência nas escolas
columns:
- name: ano
description: Ano
- name: sigla_uf
description: Sigla da Unidade da Federação
- name: tema
description: Tema da pesquisa
- name: item
description: Item referente ao tema da pesquisa
- name: quantidade_escola
description: Quantidade de escolas que aderiram a resposta

0 comments on commit 7fdb6dd

Please sign in to comment.