-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update and fix data about electrical energy consumption
- Loading branch information
Showing
3 changed files
with
258 additions
and
0 deletions.
There are no files selected for viewing
15 changes: 15 additions & 0 deletions
15
models/br_mme_consumo_energia_eletrica/br_mme_consumo_energia_eletrica__uf.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{{ | ||
config( | ||
alias="uf", | ||
schema="br_mme_consumo_energia_eletrica", | ||
materialized="table", | ||
) | ||
}} | ||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(sigla_uf as string) sigla_uf, | ||
safe_cast(tipo_consumo as string) tipo_consumo, | ||
safe_cast(numero_consumidores as float64) numero_consumidores, | ||
safe_cast(consumo as float64) consumo | ||
from `basedosdados-staging.br_mme_consumo_energia_eletrica_staging.uf` as t |
218 changes: 218 additions & 0 deletions
218
models/br_mme_consumo_energia_eletrica/code/energia_eletrica.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 41, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import warnings\n", | ||
"warnings.filterwarnings('ignore')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def tratamento_consumo(sheet_name):\n", | ||
" meses = {\n", | ||
" \"JAN\": 1,\n", | ||
" \"FEV\": 2,\n", | ||
" \"MAR\": 3,\n", | ||
" \"ABR\": 4,\n", | ||
" \"MAI\": 5,\n", | ||
" \"JUN\": 6,\n", | ||
" \"JUL\": 7,\n", | ||
" \"AGO\": 8,\n", | ||
" \"SET\": 9,\n", | ||
" \"OUT\": 10,\n", | ||
" \"NOV\": 11,\n", | ||
" \"DEZ\": 12\n", | ||
" }\n", | ||
" ufs = {'Acre':'AC', 'Alagoas':'AL', 'Amazonas':'AM', 'Amapá':'AP', 'Bahia':'BA', 'Ceará':'CE', 'Distrito Federal':'DF', 'Espírito Santo':'ES', 'Goiás':'GO', \n", | ||
" 'Maranhão':'MA', 'Minas Gerais':'MG', 'Mato Grosso do Sul':'MS', 'Mato Grosso':'MT', 'Pará':'PA', 'Paraíba':'PB', 'Pernambuco':'PE', 'Piauí':'PI', \n", | ||
" 'Paraná':'PR', 'Rio de Janeiro':'RJ', 'Rio Grande do Norte':'RN', 'Rondônia':'RO', 'Roraima':'RR', 'Rio Grande do Sul':'RS', \n", | ||
" 'Santa Catarina':'SC', 'Sergipe':'SE', 'São Paulo':'SP', 'Tocantins':'TO'}\n", | ||
" \n", | ||
" df = pd.read_excel('/mnt/x/dados/consumo_energia_eletrica/dados.xls', sheet_name, skiprows=4, skipfooter=1, usecols='A:IG')\n", | ||
" df_transposta = df.T\n", | ||
" df_transposta.reset_index(inplace=True)\n", | ||
" df_transposta.drop(df_transposta[['index', 2]], inplace=True, axis=1)\n", | ||
" df_transposta.columns = df_transposta.iloc[0]\n", | ||
" df_transposta = df_transposta[1:]\n", | ||
" df_transposta.columns = ['ano', 'mes', 'Rondônia', 'Acre', 'Amazonas', 'Roraima', 'Pará',\n", | ||
" 'Amapá', 'Tocantins', 'Maranhão', 'Piauí', 'Ceará',\n", | ||
" 'Rio Grande do Norte', 'Paraíba', 'Pernambuco', 'Alagoas', 'Sergipe',\n", | ||
" 'Bahia', 'Minas Gerais', 'Espírito Santo', 'Rio de Janeiro',\n", | ||
" 'São Paulo', 'Paraná', 'Santa Catarina', 'Rio Grande do Sul',\n", | ||
" 'Mato Grosso do Sul', 'Mato Grosso', 'Goiás', 'Distrito Federal']\n", | ||
" df_transposta_melted = pd.melt(df_transposta, id_vars=[\"ano\", 'mes'], var_name=\"Estado\", value_name=\"Valor\")\n", | ||
" df_transposta_melted['ano'].ffill(inplace=True)\n", | ||
" df_transposta_melted['mes'] = df_transposta_melted['mes'].map(meses)\n", | ||
"\n", | ||
" df_transposta_melted.rename(columns={'Estado' : 'sigla_uf', 'Valor':'consumo'}, inplace=True) \n", | ||
" df_transposta_melted['sigla_uf'] = df_transposta_melted['sigla_uf'].map(ufs)\n", | ||
" return df_transposta_melted\n", | ||
"\n", | ||
"tipos_consumo = [\"Total\", \"Cativo\", \"Residencial\", \"Industrial\", \"Comercial\", \"Outros\"]\n", | ||
"dfs_consumo = []\n", | ||
"\n", | ||
"# Realizando o tratamento para cada tipo de consumo\n", | ||
"for i, tipo in enumerate(tipos_consumo):\n", | ||
" if i < 6:\n", | ||
" df = tratamento_consumo(i + 9)\n", | ||
" df['tipo_consumo'] = tipo\n", | ||
" dfs_consumo.append(df)\n", | ||
"\n", | ||
"# Concatenando todos os DataFrames\n", | ||
"df_consumo = pd.concat(dfs_consumo)\n", | ||
"df_consumo = df_consumo[['ano', 'mes', 'sigla_uf','tipo_consumo', 'consumo']]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(38880, 5)" | ||
] | ||
}, | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df_consumo.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def tratamento_consumidores(sheet_name):\n", | ||
" meses = {\n", | ||
" \"JAN\": 1,\n", | ||
" \"FEV\": 2,\n", | ||
" \"MAR\": 3,\n", | ||
" \"ABR\": 4,\n", | ||
" \"MAI\": 5,\n", | ||
" \"JUN\": 6,\n", | ||
" \"JUL\": 7,\n", | ||
" \"AGO\": 8,\n", | ||
" \"SET\": 9,\n", | ||
" \"OUT\": 10,\n", | ||
" \"NOV\": 11,\n", | ||
" \"DEZ\": 12\n", | ||
" }\n", | ||
" ufs = {'Acre':'AC', 'Alagoas':'AL', 'Amazonas':'AM', 'Amapá':'AP', 'Bahia':'BA', 'Ceará':'CE', 'Distrito Federal':'DF', 'Espírito Santo':'ES', 'Goiás':'GO', \n", | ||
" 'Maranhão':'MA', 'Minas Gerais':'MG', 'Mato Grosso do Sul':'MS', 'Mato Grosso':'MT', 'Pará':'PA', 'Paraíba':'PB', 'Pernambuco':'PE', 'Piauí':'PI', \n", | ||
" 'Paraná':'PR', 'Rio de Janeiro':'RJ', 'Rio Grande do Norte':'RN', 'Rondônia':'RO', 'Roraima':'RR', 'Rio Grande do Sul':'RS', \n", | ||
" 'Santa Catarina':'SC', 'Sergipe':'SE', 'São Paulo':'SP', 'Tocantins':'TO'}\n", | ||
" \n", | ||
" df = pd.read_excel('/mnt/x/dados/consumo_energia_eletrica/dados.xls', sheet_name, skiprows=4, skipfooter=1, usecols='A:IG')\n", | ||
" df_transposta = df.T\n", | ||
" df_transposta.reset_index(inplace=True)\n", | ||
" df_transposta.drop(df_transposta[['index', 2]], inplace=True, axis=1)\n", | ||
" df_transposta.columns = df_transposta.iloc[0]\n", | ||
" df_transposta = df_transposta[1:]\n", | ||
" df_transposta.columns = ['ano', 'mes', 'Rondônia', 'Acre', 'Amazonas', 'Roraima', 'Pará',\n", | ||
" 'Amapá', 'Tocantins', 'Maranhão', 'Piauí', 'Ceará',\n", | ||
" 'Rio Grande do Norte', 'Paraíba', 'Pernambuco', 'Alagoas', 'Sergipe',\n", | ||
" 'Bahia', 'Minas Gerais', 'Espírito Santo', 'Rio de Janeiro',\n", | ||
" 'São Paulo', 'Paraná', 'Santa Catarina', 'Rio Grande do Sul',\n", | ||
" 'Mato Grosso do Sul', 'Mato Grosso', 'Goiás', 'Distrito Federal']\n", | ||
" df_transposta_melted = pd.melt(df_transposta, id_vars=[\"ano\", 'mes'], var_name=\"Estado\", value_name=\"Valor\")\n", | ||
" df_transposta_melted['ano'].ffill(inplace=True)\n", | ||
" df_transposta_melted['mes'] = df_transposta_melted['mes'].map(meses)\n", | ||
"\n", | ||
" df_transposta_melted.rename(columns={'Estado' : 'sigla_uf', 'Valor':'numero_consumidores'}, inplace=True) \n", | ||
" df_transposta_melted['sigla_uf'] = df_transposta_melted['sigla_uf'].map(ufs)\n", | ||
" return df_transposta_melted\n", | ||
"\n", | ||
"tipos_consumidores = ['Residencial', 'Industrial', 'Comercial', 'Outros']\n", | ||
"dfs_consumidores = []\n", | ||
"\n", | ||
"# Realizando o tratamento para cada tipo de consumidores\n", | ||
"for i, tipo in enumerate(tipos_consumidores):\n", | ||
" if i < 6:\n", | ||
" df = tratamento_consumidores(i + 15)\n", | ||
" df['tipo_consumo'] = tipo\n", | ||
" dfs_consumidores.append(df)\n", | ||
"\n", | ||
"# Concatenando todos os DataFrames\n", | ||
"df_consumidores = pd.concat(dfs_consumidores)\n", | ||
"df_consumidores = df_consumidores[['ano', 'mes', 'sigla_uf','tipo_consumo', 'numero_consumidores']]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(25920, 5)" | ||
] | ||
}, | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df_consumidores.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 38, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df_total = pd.merge(df_consumo, df_consumidores, how= 'left', on=['ano', 'mes', 'sigla_uf', 'tipo_consumo'])\n", | ||
"df_total = df_total[['ano', 'mes', 'sigla_uf', 'tipo_consumo', 'numero_consumidores', 'consumo']]\n", | ||
"df_total['consumo'] = df_total['consumo'].astype(int)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 46, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df_total.to_csv(\"/mnt/x/dados/consumo_energia_eletrica/consumo_energia.csv\", sep=',', index=False, encoding='utf-8')" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
--- | ||
version: 2 | ||
models: | ||
- name: br_mme_consumo_energia_eletrica__uf | ||
description: Consumo de energia elétrica em nível nacional e segmentado pelas | ||
classes residencial, industrial, comercial e outros (rural, serviço público | ||
e iluminação pública). | ||
tests: | ||
- not_null_proportion_multiple_columns: | ||
at_least: 0.05 | ||
- dbt_utils.unique_combination_of_columns: | ||
combination_of_columns: [ano, mes, sigla_uf, tipo_consumo] | ||
columns: | ||
- name: ano | ||
description: Ano | ||
- name: mes | ||
description: Mês | ||
- name: sigla_uf | ||
description: Sigla da Unidade da Federação | ||
- name: tipo_consumo | ||
description: Tipo de Consumo | ||
- name: numero_consumidores | ||
description: Número de consumidores de energia elétrica atendidos pela rede | ||
- name: consumo | ||
description: Consumo de energia elétrica na rede (MWh) |