Skip to content

Commit

Permalink
update and fix data about electrical energy consumption
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktx committed Mar 21, 2024
1 parent 38496f3 commit 2eb4094
Show file tree
Hide file tree
Showing 3 changed files with 258 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{
config(
alias="uf",
schema="br_mme_consumo_energia_eletrica",
materialized="table",
)
}}
select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(tipo_consumo as string) tipo_consumo,
safe_cast(numero_consumidores as float64) numero_consumidores,
safe_cast(consumo as float64) consumo
from `basedosdados-staging.br_mme_consumo_energia_eletrica_staging.uf` as t
218 changes: 218 additions & 0 deletions models/br_mme_consumo_energia_eletrica/code/energia_eletrica.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def tratamento_consumo(sheet_name):\n",
" meses = {\n",
" \"JAN\": 1,\n",
" \"FEV\": 2,\n",
" \"MAR\": 3,\n",
" \"ABR\": 4,\n",
" \"MAI\": 5,\n",
" \"JUN\": 6,\n",
" \"JUL\": 7,\n",
" \"AGO\": 8,\n",
" \"SET\": 9,\n",
" \"OUT\": 10,\n",
" \"NOV\": 11,\n",
" \"DEZ\": 12\n",
" }\n",
" ufs = {'Acre':'AC', 'Alagoas':'AL', 'Amazonas':'AM', 'Amapá':'AP', 'Bahia':'BA', 'Ceará':'CE', 'Distrito Federal':'DF', 'Espírito Santo':'ES', 'Goiás':'GO', \n",
" 'Maranhão':'MA', 'Minas Gerais':'MG', 'Mato Grosso do Sul':'MS', 'Mato Grosso':'MT', 'Pará':'PA', 'Paraíba':'PB', 'Pernambuco':'PE', 'Piauí':'PI', \n",
" 'Paraná':'PR', 'Rio de Janeiro':'RJ', 'Rio Grande do Norte':'RN', 'Rondônia':'RO', 'Roraima':'RR', 'Rio Grande do Sul':'RS', \n",
" 'Santa Catarina':'SC', 'Sergipe':'SE', 'São Paulo':'SP', 'Tocantins':'TO'}\n",
" \n",
" df = pd.read_excel('/mnt/x/dados/consumo_energia_eletrica/dados.xls', sheet_name, skiprows=4, skipfooter=1, usecols='A:IG')\n",
" df_transposta = df.T\n",
" df_transposta.reset_index(inplace=True)\n",
" df_transposta.drop(df_transposta[['index', 2]], inplace=True, axis=1)\n",
" df_transposta.columns = df_transposta.iloc[0]\n",
" df_transposta = df_transposta[1:]\n",
" df_transposta.columns = ['ano', 'mes', 'Rondônia', 'Acre', 'Amazonas', 'Roraima', 'Pará',\n",
" 'Amapá', 'Tocantins', 'Maranhão', 'Piauí', 'Ceará',\n",
" 'Rio Grande do Norte', 'Paraíba', 'Pernambuco', 'Alagoas', 'Sergipe',\n",
" 'Bahia', 'Minas Gerais', 'Espírito Santo', 'Rio de Janeiro',\n",
" 'São Paulo', 'Paraná', 'Santa Catarina', 'Rio Grande do Sul',\n",
" 'Mato Grosso do Sul', 'Mato Grosso', 'Goiás', 'Distrito Federal']\n",
" df_transposta_melted = pd.melt(df_transposta, id_vars=[\"ano\", 'mes'], var_name=\"Estado\", value_name=\"Valor\")\n",
" df_transposta_melted['ano'].ffill(inplace=True)\n",
" df_transposta_melted['mes'] = df_transposta_melted['mes'].map(meses)\n",
"\n",
" df_transposta_melted.rename(columns={'Estado' : 'sigla_uf', 'Valor':'consumo'}, inplace=True) \n",
" df_transposta_melted['sigla_uf'] = df_transposta_melted['sigla_uf'].map(ufs)\n",
" return df_transposta_melted\n",
"\n",
"tipos_consumo = [\"Total\", \"Cativo\", \"Residencial\", \"Industrial\", \"Comercial\", \"Outros\"]\n",
"dfs_consumo = []\n",
"\n",
"# Realizando o tratamento para cada tipo de consumo\n",
"for i, tipo in enumerate(tipos_consumo):\n",
" if i < 6:\n",
" df = tratamento_consumo(i + 9)\n",
" df['tipo_consumo'] = tipo\n",
" dfs_consumo.append(df)\n",
"\n",
"# Concatenando todos os DataFrames\n",
"df_consumo = pd.concat(dfs_consumo)\n",
"df_consumo = df_consumo[['ano', 'mes', 'sigla_uf','tipo_consumo', 'consumo']]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(38880, 5)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_consumo.shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"def tratamento_consumidores(sheet_name):\n",
" meses = {\n",
" \"JAN\": 1,\n",
" \"FEV\": 2,\n",
" \"MAR\": 3,\n",
" \"ABR\": 4,\n",
" \"MAI\": 5,\n",
" \"JUN\": 6,\n",
" \"JUL\": 7,\n",
" \"AGO\": 8,\n",
" \"SET\": 9,\n",
" \"OUT\": 10,\n",
" \"NOV\": 11,\n",
" \"DEZ\": 12\n",
" }\n",
" ufs = {'Acre':'AC', 'Alagoas':'AL', 'Amazonas':'AM', 'Amapá':'AP', 'Bahia':'BA', 'Ceará':'CE', 'Distrito Federal':'DF', 'Espírito Santo':'ES', 'Goiás':'GO', \n",
" 'Maranhão':'MA', 'Minas Gerais':'MG', 'Mato Grosso do Sul':'MS', 'Mato Grosso':'MT', 'Pará':'PA', 'Paraíba':'PB', 'Pernambuco':'PE', 'Piauí':'PI', \n",
" 'Paraná':'PR', 'Rio de Janeiro':'RJ', 'Rio Grande do Norte':'RN', 'Rondônia':'RO', 'Roraima':'RR', 'Rio Grande do Sul':'RS', \n",
" 'Santa Catarina':'SC', 'Sergipe':'SE', 'São Paulo':'SP', 'Tocantins':'TO'}\n",
" \n",
" df = pd.read_excel('/mnt/x/dados/consumo_energia_eletrica/dados.xls', sheet_name, skiprows=4, skipfooter=1, usecols='A:IG')\n",
" df_transposta = df.T\n",
" df_transposta.reset_index(inplace=True)\n",
" df_transposta.drop(df_transposta[['index', 2]], inplace=True, axis=1)\n",
" df_transposta.columns = df_transposta.iloc[0]\n",
" df_transposta = df_transposta[1:]\n",
" df_transposta.columns = ['ano', 'mes', 'Rondônia', 'Acre', 'Amazonas', 'Roraima', 'Pará',\n",
" 'Amapá', 'Tocantins', 'Maranhão', 'Piauí', 'Ceará',\n",
" 'Rio Grande do Norte', 'Paraíba', 'Pernambuco', 'Alagoas', 'Sergipe',\n",
" 'Bahia', 'Minas Gerais', 'Espírito Santo', 'Rio de Janeiro',\n",
" 'São Paulo', 'Paraná', 'Santa Catarina', 'Rio Grande do Sul',\n",
" 'Mato Grosso do Sul', 'Mato Grosso', 'Goiás', 'Distrito Federal']\n",
" df_transposta_melted = pd.melt(df_transposta, id_vars=[\"ano\", 'mes'], var_name=\"Estado\", value_name=\"Valor\")\n",
" df_transposta_melted['ano'].ffill(inplace=True)\n",
" df_transposta_melted['mes'] = df_transposta_melted['mes'].map(meses)\n",
"\n",
" df_transposta_melted.rename(columns={'Estado' : 'sigla_uf', 'Valor':'numero_consumidores'}, inplace=True) \n",
" df_transposta_melted['sigla_uf'] = df_transposta_melted['sigla_uf'].map(ufs)\n",
" return df_transposta_melted\n",
"\n",
"tipos_consumidores = ['Residencial', 'Industrial', 'Comercial', 'Outros']\n",
"dfs_consumidores = []\n",
"\n",
"# Realizando o tratamento para cada tipo de consumidores\n",
"for i, tipo in enumerate(tipos_consumidores):\n",
" if i < 6:\n",
" df = tratamento_consumidores(i + 15)\n",
" df['tipo_consumo'] = tipo\n",
" dfs_consumidores.append(df)\n",
"\n",
"# Concatenando todos os DataFrames\n",
"df_consumidores = pd.concat(dfs_consumidores)\n",
"df_consumidores = df_consumidores[['ano', 'mes', 'sigla_uf','tipo_consumo', 'numero_consumidores']]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(25920, 5)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_consumidores.shape"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"df_total = pd.merge(df_consumo, df_consumidores, how= 'left', on=['ano', 'mes', 'sigla_uf', 'tipo_consumo'])\n",
"df_total = df_total[['ano', 'mes', 'sigla_uf', 'tipo_consumo', 'numero_consumidores', 'consumo']]\n",
"df_total['consumo'] = df_total['consumo'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"df_total.to_csv(\"/mnt/x/dados/consumo_energia_eletrica/consumo_energia.csv\", sep=',', index=False, encoding='utf-8')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
25 changes: 25 additions & 0 deletions models/br_mme_consumo_energia_eletrica/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
---
version: 2
models:
- name: br_mme_consumo_energia_eletrica__uf
description: Consumo de energia elétrica em nível nacional e segmentado pelas
classes residencial, industrial, comercial e outros (rural, serviço público
e iluminação pública).
tests:
- not_null_proportion_multiple_columns:
at_least: 0.05
- dbt_utils.unique_combination_of_columns:
combination_of_columns: [ano, mes, sigla_uf, tipo_consumo]
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
- name: tipo_consumo
description: Tipo de Consumo
- name: numero_consumidores
description: Número de consumidores de energia elétrica atendidos pela rede
- name: consumo
description: Consumo de energia elétrica na rede (MWh)

0 comments on commit 2eb4094

Please sign in to comment.