From d866591718b0782b30cbd21e2e84068294398284 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Thu, 14 Sep 2023 19:45:51 -0300 Subject: [PATCH 01/15] draf dicionario questionarios enem --- models/br_inep_enem/code/main.py | 121 +++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 models/br_inep_enem/code/main.py diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py new file mode 100644 index 00000000..50a14486 --- /dev/null +++ b/models/br_inep_enem/code/main.py @@ -0,0 +1,121 @@ +import pandas as pd +import requests +import zipfile +import io +import os + +BASE_URL = "https://download.inep.gov.br/microdados" +YEARS = range(1998, 2022 + 1) + +# CWD = os.path.dirname(os.getcwd()) +CWD = os.getcwd() +INPUT = os.path.join(CWD, "input") +TMP = os.path.join(CWD, "tmp") +OUTPUT = os.path.join(CWD, "output") + +if not os.path.exists(INPUT): + os.mkdir(INPUT) + +if not os.path.exists(TMP): + os.mkdir(TMP) + +if not os.path.exists(OUTPUT): + os.mkdir(OUTPUT) + + +def make_url(year: int) -> str: + return f"{BASE_URL}/microdados_enem_{year}.zip" + + +def download_file(year: int) -> None: + if os.path.exists(f"{INPUT}/{year}"): + print(f"Data for {year} already exists") + return None + url = make_url(year) + r = requests.get(url, verify=False) + z = zipfile.ZipFile(io.BytesIO(r.content)) + z.extractall(f"{INPUT}/{year}") + return None + + +for year in YEARS: + download_file(year) + + +def extract_dicts() -> tuple[str, str]: + z = zipfile.ZipFile(f"{INPUT}/dicionarios-20230914T203333Z-001.zip") + z.extractall(TMP) + return (f"{TMP}/dicionarios", "Dicionário_Microdados_ENEM_") + + +dir_dicts, template_file = extract_dicts() + +def build_dictionary(year: int, path: str) -> pd.DataFrame: + # dict_folder_name = [ + # folder for folder in os.listdir(f"{INPUT}/{year}") if folder.startswith("DICIO") + # ] + + # folder = dict_folder_name[0] + + df = pd.read_excel(path) + + first_col = df.columns[0] + assert isinstance(first_col, str) and first_col.startswith("DICIONÁRIO") + + line_separator = ( + f"QUESTIONÁRIO SOCIOECONÔMICO DO ENEM" + if year < 2010 + else "DADOS DO QUESTIONÁRIO SOCIOECONÔMICO" + ) + + # print(f"{first_col=}, {line_separator=}") + + start_line = df[df[first_col].str.contains(line_separator, na=False)].index[0] + + # Drop last 6 lines + df = df[df.index > start_line] + + assert isinstance(df, pd.DataFrame) + + columns = { + "Unnamed: 1": "descricao", + "Unnamed: 2": "chave", + "Unnamed: 3": "valor", + "Unnamed: 4": "tamanho", + "Unnamed: 5": "tipo", + } + + columns[first_col] = "coluna" + + df = df.rename(columns=columns, errors="raise") + + # Drop lines here "chave" is empty + df = df[df["chave"].notna()] + + assert isinstance(df, pd.DataFrame) + + cols = df["coluna"].to_list() + + for index in range(0, len(cols) + 1): + next_index = index + 1 + if next_index < len(cols) and pd.isna(cols[next_index]): + cols[next_index] = cols[index] + + df["coluna"] = cols + df["cobertura_temporal"] = str(year) + df["id_tabela"] = f"questionario_socioeconomico_{year}" + + df = df[["id_tabela", "coluna", "chave", "cobertura_temporal", "valor"]] + df = df[df["coluna"] != "IN_QSE"] + + # Some records contains multiple values + df["chave"] = df["chave"].apply(lambda value: value.split("\n") if isinstance(value, str) and "\n" in value else value) + + assert isinstance(df, pd.DataFrame) + return df.explode("chave") + +dict_by_table = [ + build_dictionary(year, f"{dir_dicts}/{template_file}{year}.xlsx") for year in YEARS +] + +pd.concat(dict_by_table).to_csv(f"{OUTPUT}/dicionario_questionarios.csv", index = False) From 58440362bb196ce2a31c879f60560c8886c8bcc6 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Fri, 15 Sep 2023 09:46:05 -0300 Subject: [PATCH 02/15] fix empty records --- models/br_inep_enem/code/main.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 50a14486..9598c174 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -50,6 +50,7 @@ def extract_dicts() -> tuple[str, str]: dir_dicts, template_file = extract_dicts() + def build_dictionary(year: int, path: str) -> pd.DataFrame: # dict_folder_name = [ # folder for folder in os.listdir(f"{INPUT}/{year}") if folder.startswith("DICIO") @@ -72,7 +73,6 @@ def build_dictionary(year: int, path: str) -> pd.DataFrame: start_line = df[df[first_col].str.contains(line_separator, na=False)].index[0] - # Drop last 6 lines df = df[df.index > start_line] assert isinstance(df, pd.DataFrame) @@ -109,13 +109,25 @@ def build_dictionary(year: int, path: str) -> pd.DataFrame: df = df[df["coluna"] != "IN_QSE"] # Some records contains multiple values - df["chave"] = df["chave"].apply(lambda value: value.split("\n") if isinstance(value, str) and "\n" in value else value) + df["chave"] = df["chave"].apply(lambda value: value.split("\n") if isinstance(value, str) and "\n" in value else value) # type: ignore assert isinstance(df, pd.DataFrame) - return df.explode("chave") + df = df.explode("chave") + + cols_with_empty_value = df[df["valor"].isna()]["coluna"].unique() # type: ignore + + for col in cols_with_empty_value: + valid_value = df.loc[ + (df["coluna"] == col) & (df["valor"].notna()), "valor" + ].values + assert len(valid_value) == 1 + df.loc[df["coluna"] == col, "valor"] = valid_value[0] + + return df + dict_by_table = [ build_dictionary(year, f"{dir_dicts}/{template_file}{year}.xlsx") for year in YEARS ] -pd.concat(dict_by_table).to_csv(f"{OUTPUT}/dicionario_questionarios.csv", index = False) +pd.concat(dict_by_table).to_csv(f"{OUTPUT}/dicionario_questionarios.csv", index=False) From c5b2b7e987138f9fd91fb8eaa21752a084191258 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Fri, 15 Sep 2023 15:03:39 -0300 Subject: [PATCH 03/15] cont --- models/br_inep_enem/code/main.py | 67 ++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 9598c174..38e9800b 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -131,3 +131,70 @@ def build_dictionary(year: int, path: str) -> pd.DataFrame: ] pd.concat(dict_by_table).to_csv(f"{OUTPUT}/dicionario_questionarios.csv", index=False) + + +def read_remote_sheet(url): + url = url.replace("edit#gid=", "export?format=csv&gid=") + return pd.read_csv( + io.StringIO(requests.get(url, timeout=10).content.decode("utf-8")) + ) + + +microdados_arch = read_remote_sheet( + "https://docs.google.com/spreadsheets/d/1EUhqjdB6BDGlksgy4UY8cwTF7pQBavP7Mrhgi-y3GRI/edit#gid=0" +) +microdados_arch = microdados_arch[microdados_arch["covered_by_dictionary"] == "yes"] + + +def build_dictionary_microdados(year: int, path: str, cols_filled: list[str]): + df = pd.read_excel(path) + + first_col = df.columns[0] + assert isinstance(first_col, str) and first_col.startswith("DICIONÁRIO") + + line_end_separator = ( + f"QUESTIONÁRIO SOCIOECONÔMICO DO ENEM" + if year < 2010 + else "DADOS DO QUESTIONÁRIO SOCIOECONÔMICO" + ) + + start_line = df[df[first_col].str.contains("NU_INSCRICAO", na=False)].index[0] + end_line = df[df[first_col].str.contains(line_end_separator, na=False)].index[0] + + df = df[(df.index >= start_line) & (df.index < end_line)] + + columns = { + "Unnamed: 1": "descricao", + "Unnamed: 2": "chave", + "Unnamed: 3": "valor", + "Unnamed: 4": "tamanho", + "Unnamed: 5": "tipo", + } + columns[first_col] = "variavel" + + df = df.rename(columns=columns, errors="raise") # type: ignore + + cols_filled = df["variavel"].to_list() + + for index in range(0, len(cols_filled) + 1): + next_index = index + 1 + if next_index < len(cols_filled) and pd.isna(cols_filled[next_index]): + cols_filled[next_index] = cols_filled[index] + + df["variavel"] = cols_filled + + assert isinstance(df, pd.DataFrame) + return df + + +a = build_dictionary_microdados( + 1998, f"{dir_dicts}/{template_file}{1998}.xlsx", microdados_arch["name"].to_list() +) +a["variavel"].to_list() + +a.tail() +a.columns + +a[a["variavel"] == "TP_FAIXA_ETARIA"] + +microdados_arch["name"].to_list() From f2df7dffa4cb13c1f783a0ba29e54ab901fd67c8 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Tue, 19 Sep 2023 09:51:42 -0300 Subject: [PATCH 04/15] add microdados --- models/br_inep_enem/code/main.py | 108 +++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 13 deletions(-) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 38e9800b..9ced38e7 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np import requests import zipfile import io @@ -38,8 +39,8 @@ def download_file(year: int) -> None: return None -for year in YEARS: - download_file(year) +# for year in YEARS: +# download_file(year) def extract_dicts() -> tuple[str, str]: @@ -146,11 +147,32 @@ def read_remote_sheet(url): microdados_arch = microdados_arch[microdados_arch["covered_by_dictionary"] == "yes"] -def build_dictionary_microdados(year: int, path: str, cols_filled: list[str]): +def get_original_name(col_name: str, year: int) -> str: + target_col_year = f"original_name_{year}" + values = microdados_arch.loc[ + microdados_arch["name"] == col_name, target_col_year + ].values + assert len(values) == 1 + return values[0] + + +def get_value_and_keys(df: pd.DataFrame, col_name: str, year: int) -> pd.DataFrame: + original_col_name = get_original_name(col_name, year) + df = df.loc[df["variavel"] == original_col_name][["chave", "valor"]] + df["nome_coluna"] = col_name + df["ano"] = str(year) + return df + + +def build_dictionary_microdados( + year: int, path: str, cols_covered_by_dictionary: list[str] +): df = pd.read_excel(path) first_col = df.columns[0] - assert isinstance(first_col, str) and first_col.startswith("DICIONÁRIO") + assert isinstance(first_col, str) and first_col.startswith( + "DICIONÁRIO" + ), f"First column should be a string, {path}=" line_end_separator = ( f"QUESTIONÁRIO SOCIOECONÔMICO DO ENEM" @@ -183,18 +205,78 @@ def build_dictionary_microdados(year: int, path: str, cols_filled: list[str]): df["variavel"] = cols_filled - assert isinstance(df, pd.DataFrame) - return df + result = [ + get_value_and_keys(df, col_name, year) + for col_name in cols_covered_by_dictionary + ] + + return pd.concat(result).map(lambda x: x.strip() if isinstance(x, str) else x) -a = build_dictionary_microdados( - 1998, f"{dir_dicts}/{template_file}{1998}.xlsx", microdados_arch["name"].to_list() +dict_microdados_by_year = pd.concat( + [ + build_dictionary_microdados(year, f"{dir_dicts}/{template_file}{year}.xlsx", microdados_arch["name"].to_list()) for year in YEARS # type: ignore + ] ) -a["variavel"].to_list() -a.tail() -a.columns -a[a["variavel"] == "TP_FAIXA_ETARIA"] +# Para cada coluna vamos verificar se o par chave/valor são iguais entre todos os anos +def gen_unique_key_value(col_name: str, df: pd.DataFrame): + def create_intervals(years): + if len(years) == 1: + return [years] + + intervals = [] + current_interval = [years[0]] + + for i in range(1, len(years)): + if years[i] - years[i - 1] != 1: + current_interval.append(years[i - 1]) + intervals.append(current_interval) + current_interval = [years[i]] + + current_interval.append(years[-1]) + intervals.append(current_interval) + + return intervals + + def make_ranges(key, value): + values_by_key = df.loc[ + (df["chave"] == key) & (df["valor"] == value), "valor" + ].values + assert len(set(values_by_key)) == 1, f"{col_name=}, {values_by_key=}" + + years = df.loc[ + (df["chave"] == key) & (df["valor"] == value), "ano" + ].values.astype(int) + + intervals = [list(set(interval)) for interval in create_intervals(years)] + + cobertura_temporal = [ + "(1)".join(map(str, np.sort(interval))) for interval in intervals + ] + + return (key, values_by_key[0], ",".join(cobertura_temporal)) + + ranges = [ + make_ranges(key, value) for (key, value), _ in df.groupby(["chave", "valor"]) # type: ignore + ] + + basic_cols = ["chave", "cobertura_temporal", "valor"] + + dict_df = pd.DataFrame(ranges, columns=basic_cols) + + dict_df["nome_coluna"] = col_name + dict_df["id_tabela"] = "microdados" + + all_cols = [*["id_tabela", "nome_coluna"], *basic_cols] + + return dict_df[all_cols] + -microdados_arch["name"].to_list() +pd.concat( + [ + gen_unique_key_value(col_name, df) # type: ignore + for col_name, df in dict_microdados_by_year.groupby("nome_coluna") + ] +).to_csv(f"{OUTPUT}/dicionario_microdados.csv", index=False) From fd8fa85d82cea1d3e46f5d9e0ca7f628c96eb60c Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Thu, 21 Sep 2023 23:08:23 -0300 Subject: [PATCH 05/15] update script --- models/br_inep_enem/code/main.py | 101 +++++++++++++++++-------------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 9ced38e7..2b3cbe80 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -5,10 +5,8 @@ import io import os -BASE_URL = "https://download.inep.gov.br/microdados" YEARS = range(1998, 2022 + 1) -# CWD = os.path.dirname(os.getcwd()) CWD = os.getcwd() INPUT = os.path.join(CWD, "input") TMP = os.path.join(CWD, "tmp") @@ -24,27 +22,9 @@ os.mkdir(OUTPUT) -def make_url(year: int) -> str: - return f"{BASE_URL}/microdados_enem_{year}.zip" - - -def download_file(year: int) -> None: - if os.path.exists(f"{INPUT}/{year}"): - print(f"Data for {year} already exists") - return None - url = make_url(year) - r = requests.get(url, verify=False) - z = zipfile.ZipFile(io.BytesIO(r.content)) - z.extractall(f"{INPUT}/{year}") - return None - - -# for year in YEARS: -# download_file(year) - - def extract_dicts() -> tuple[str, str]: - z = zipfile.ZipFile(f"{INPUT}/dicionarios-20230914T203333Z-001.zip") + zip_file = [file for file in os.listdir(INPUT) if file.endswith(".zip")][0] + z = zipfile.ZipFile(f"{INPUT}/{zip_file}") z.extractall(TMP) return (f"{TMP}/dicionarios", "Dicionário_Microdados_ENEM_") @@ -53,12 +33,6 @@ def extract_dicts() -> tuple[str, str]: def build_dictionary(year: int, path: str) -> pd.DataFrame: - # dict_folder_name = [ - # folder for folder in os.listdir(f"{INPUT}/{year}") if folder.startswith("DICIO") - # ] - - # folder = dict_folder_name[0] - df = pd.read_excel(path) first_col = df.columns[0] @@ -70,8 +44,6 @@ def build_dictionary(year: int, path: str) -> pd.DataFrame: else "DADOS DO QUESTIONÁRIO SOCIOECONÔMICO" ) - # print(f"{first_col=}, {line_separator=}") - start_line = df[df[first_col].str.contains(line_separator, na=False)].index[0] df = df[df.index > start_line] @@ -102,12 +74,12 @@ def build_dictionary(year: int, path: str) -> pd.DataFrame: if next_index < len(cols) and pd.isna(cols[next_index]): cols[next_index] = cols[index] - df["coluna"] = cols - df["cobertura_temporal"] = str(year) + df["nome_coluna"] = cols + df["cobertura_temporal"] = None df["id_tabela"] = f"questionario_socioeconomico_{year}" - df = df[["id_tabela", "coluna", "chave", "cobertura_temporal", "valor"]] - df = df[df["coluna"] != "IN_QSE"] + df = df[["id_tabela", "nome_coluna", "chave", "cobertura_temporal", "valor"]] + df = df[df["nome_coluna"] != "IN_QSE"] # Some records contains multiple values df["chave"] = df["chave"].apply(lambda value: value.split("\n") if isinstance(value, str) and "\n" in value else value) # type: ignore @@ -115,23 +87,26 @@ def build_dictionary(year: int, path: str) -> pd.DataFrame: assert isinstance(df, pd.DataFrame) df = df.explode("chave") - cols_with_empty_value = df[df["valor"].isna()]["coluna"].unique() # type: ignore + cols_with_empty_value = df[df["valor"].isna()]["nome_coluna"].unique() # type: ignore for col in cols_with_empty_value: valid_value = df.loc[ - (df["coluna"] == col) & (df["valor"].notna()), "valor" + (df["nome_coluna"] == col) & (df["valor"].notna()), "valor" ].values assert len(valid_value) == 1 - df.loc[df["coluna"] == col, "valor"] = valid_value[0] + df.loc[df["nome_coluna"] == col, "valor"] = valid_value[0] return df -dict_by_table = [ - build_dictionary(year, f"{dir_dicts}/{template_file}{year}.xlsx") for year in YEARS -] +dict_by_table = pd.concat( + [ + build_dictionary(year, f"{dir_dicts}/{template_file}{year}.xlsx") + for year in YEARS + ] +) -pd.concat(dict_by_table).to_csv(f"{OUTPUT}/dicionario_questionarios.csv", index=False) +dict_by_table.to_csv(f"{OUTPUT}/dicionario_questionarios.csv", index=False) def read_remote_sheet(url): @@ -252,11 +227,20 @@ def make_ranges(key, value): intervals = [list(set(interval)) for interval in create_intervals(years)] - cobertura_temporal = [ - "(1)".join(map(str, np.sort(interval))) for interval in intervals - ] + def make_temporal_cov(interval): + interval_sort = list( + map( + lambda year: "" + if year == max(YEARS) or year == min(YEARS) + else str(year), + np.sort(interval), + ), + ) + return "(1)".join(interval_sort) + + cobertura_temporal = [make_temporal_cov(interval) for interval in intervals] - return (key, values_by_key[0], ",".join(cobertura_temporal)) + return (key, ",".join(cobertura_temporal), values_by_key[0]) ranges = [ make_ranges(key, value) for (key, value), _ in df.groupby(["chave", "valor"]) # type: ignore @@ -266,6 +250,24 @@ def make_ranges(key, value): dict_df = pd.DataFrame(ranges, columns=basic_cols) + unique_keys = [i for (i, v) in dict_df["chave"].value_counts().items() if v == 1] + + # Drop temporal coverage if key is unique + def drop_temporal_cov(key, temporal_cov): + return None if key in unique_keys else temporal_cov + + dict_df["cobertura_temporal"] = dict_df[["chave", "cobertura_temporal"]].apply( + lambda values: drop_temporal_cov(*values), axis=1 + ) + + # Last edits + if col_name in [ + "tipo_prova_matematica", + "tipo_prova_ciencias_natureza", + "tipo_prova_ciencias_humanas", + ]: + dict_df["valor"] = dict_df["valor"].apply(lambda value: value.title()) + dict_df["nome_coluna"] = col_name dict_df["id_tabela"] = "microdados" @@ -274,9 +276,14 @@ def make_ranges(key, value): return dict_df[all_cols] -pd.concat( +dict_microdados = pd.concat( [ gen_unique_key_value(col_name, df) # type: ignore for col_name, df in dict_microdados_by_year.groupby("nome_coluna") ] -).to_csv(f"{OUTPUT}/dicionario_microdados.csv", index=False) +) + +dict_microdados.to_excel(f"{OUTPUT}/dicionario_microdados.xlsx", index=False) + + +pd.concat([dict_microdados, dict_by_table]).to_excel(f"{OUTPUT}/dicionario.xlsx", index=False) # type: ignore From 89ca1ac5e662764920ebca9a7caf8c809d989224 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Thu, 21 Sep 2023 23:12:39 -0300 Subject: [PATCH 06/15] br_inep_enem: add dicionario.sql --- models/br_inep_enem/dicionario.sql | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 models/br_inep_enem/dicionario.sql diff --git a/models/br_inep_enem/dicionario.sql b/models/br_inep_enem/dicionario.sql new file mode 100644 index 00000000..1a8ee86b --- /dev/null +++ b/models/br_inep_enem/dicionario.sql @@ -0,0 +1,14 @@ +{{ + config( + alias='dicionario', + schema='br_inep_enem' + ) +}} + +SELECT +SAFE_CAST(id_tabela AS STRING) id_tabela, +SAFE_CAST(coluna AS STRING) coluna, +SAFE_CAST(chave AS STRING) chave, +SAFE_CAST(cobertura_temporal AS STRING) cobertura_temporal, +SAFE_CAST(valor AS STRING) valor +FROM basedosdados-staging.br_inep_enem.dicionario AS t From 2ba572ba14a26a6ea51ef6e66ab3b2c11d8feb20 Mon Sep 17 00:00:00 2001 From: folhesgabriel Date: Fri, 22 Sep 2023 09:01:31 -0300 Subject: [PATCH 07/15] insere modelos de br_ibge_ipca, br_ibge_ipca15, br_ibge_inpc --- .../br_ibge_inpc/br_ibge_inpc__mes_brasil.sql | 28 ++++++++++++++++- .../br_ibge_inpc__mes_categoria_brasil.sql | 28 ++++++++++++++++- .../br_ibge_inpc__mes_categoria_municipio.sql | 30 ++++++++++++++++-- .../br_ibge_inpc__mes_categoria_rm.sql | 30 ++++++++++++++++-- .../br_ibge_ipca/br_ibge_ipca__mes_brasil.sql | 29 ++++++++++++++++- .../br_ibge_ipca__mes_categoria_brasil.sql | 30 ++++++++++++++++-- .../br_ibge_ipca__mes_categoria_municipio.sql | 30 ++++++++++++++++-- .../br_ibge_ipca__mes_categoria_rm.sql | 31 +++++++++++++++++-- .../br_ibge_ipca15__mes_brasil.sql | 28 ++++++++++++++++- .../br_ibge_ipca15__mes_categoria_brasil.sql | 30 ++++++++++++++++-- ...r_ibge_ipca15__mes_categoria_municipio.sql | 30 ++++++++++++++++-- .../br_ibge_ipca15__mes_categoria_rm.sql | 30 ++++++++++++++++-- 12 files changed, 334 insertions(+), 20 deletions(-) diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql index d1586eee..0ed5d32b 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_brasil', schema='br_ibge_inpc') }} +{{ + config( + alias='mes_brasil', + schema='br_ibge_inpc', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 1979, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -9,3 +32,6 @@ SAFE_CAST(variacao_semestral AS FLOAT64) variacao_semestral, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses FROM basedosdados-staging.br_ibge_inpc_staging.mes_brasil AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql index 9c1eaa4c..c498a646 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_brasil', schema='br_ibge_inpc') }} +{{ + config( + alias='mes_categoria_brasil', + schema='br_ibge_inpc', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2000, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -10,3 +33,6 @@ SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses FROM basedosdados-staging.br_ibge_inpc_staging.mes_categoria_brasil AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql index a318509b..28ec8515 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_municipio', schema='br_ibge_inpc') }} +{{ + config( + alias='mes_categoria_municipio', + schema='br_ibge_inpc', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2000, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -10,4 +33,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_inpc_staging.mes_categoria_municipio AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_inpc_staging.mes_categoria_municipio AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql index 1cad03c2..571a8f5e 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_rm', schema='br_ibge_inpc') }} +{{ + config( + alias='mes_categoria_rm', + schema='br_ibge_inpc', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2000, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -10,4 +33,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_inpc_staging.mes_categoria_rm AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_inpc_staging.mes_categoria_rm AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql index 491b37d4..942aecce 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_brasil', schema='br_ibge_ipca') }} +{{ + config( + alias='mes_brasil', + schema='br_ibge_ipca', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 1979, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -9,3 +32,7 @@ SAFE_CAST(variacao_semestral AS FLOAT64) variacao_semestral, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses FROM basedosdados-staging.br_ibge_ipca_staging.mes_brasil AS t + +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql index 22f2044c..5df9a92d 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_brasil', schema='br_ibge_ipca') }} +{{ + config( + alias='mes_categoria_brasil', + schema='br_ibge_ipca', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2020, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -9,4 +32,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_ipca_staging.mes_categoria_brasil AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_ipca_staging.mes_categoria_brasil AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql index 3189ecdd..6ee74cf9 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_municipio', schema='br_ibge_ipca') }} +{{ + config( + alias='mes_categoria_municipio', + schema='br_ibge_ipca', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2020, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -10,4 +33,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_ipca_staging.mes_categoria_municipio AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_ipca_staging.mes_categoria_municipio AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql index 7be742b6..366e0b44 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql @@ -1,4 +1,28 @@ -{{ config(alias='mes_categoria_rm', schema='br_ibge_ipca') }} +{{ + config( + alias='mes_categoria_rm', + schema='br_ibge_ipca', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2020, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} + SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -10,4 +34,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_ipca_staging.mes_categoria_rm AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_ipca_staging.mes_categoria_rm AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql index 4929816e..25d9d477 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_brasil', schema='br_ibge_ipca15') }} +{{ + config( + alias='mes_brasil', + schema='br_ibge_ipca15', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2000, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -9,3 +32,6 @@ SAFE_CAST(variacao_semestral AS FLOAT64) variacao_semestral, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses FROM basedosdados-staging.br_ibge_ipca15_staging.mes_brasil AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql index 34620f30..a04dae50 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_brasil', schema='br_ibge_ipca15') }} +{{ + config( + alias='mes_categoria_brasil', + schema='br_ibge_ipca15', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2020, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -9,4 +32,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_ipca15_staging.mes_categoria_brasil AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_ipca15_staging.mes_categoria_brasil AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql index 93cab180..a4400dca 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_municipio', schema='br_ibge_ipca15') }} +{{ + config( + alias='mes_categoria_municipio', + schema='br_ibge_ipca15', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2020, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -10,4 +33,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_ipca15_staging.mes_categoria_municipio AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_ipca15_staging.mes_categoria_municipio AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql index db03b31e..98324365 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql @@ -1,4 +1,27 @@ -{{ config(alias='mes_categoria_rm', schema='br_ibge_ipca15') }} +{{ + config( + alias='mes_categoria_rm', + schema='br_ibge_ipca15', + materialized='incremental', + partition_by = { + "field": "ano", + "data_type": "int64", + "range": { + "start": 2020, + "end": 2024, + "interval": 1} + }, + pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", + post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter + ON {{this}} + GRANT TO ("allUsers") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) > 6)', + 'CREATE OR REPLACE ROW ACCESS POLICY bdpro_filter + ON {{this}} + GRANT TO ("group:bd-pro@basedosdados.org", "group:sudo@basedosdados.org") + FILTER USING (DATE_DIFF(DATE("{{ run_started_at.strftime("%Y-%m-%d") }}"),DATE(CAST(ano AS INT64),CAST(mes AS INT64),1), MONTH) <= 6)'] + ) +}} SELECT SAFE_CAST(ano AS INT64) ano, SAFE_CAST(mes AS INT64) mes, @@ -10,4 +33,7 @@ SAFE_CAST(peso_mensal AS FLOAT64) peso_mensal, SAFE_CAST(variacao_mensal AS FLOAT64) variacao_mensal, SAFE_CAST(variacao_anual AS FLOAT64) variacao_anual, SAFE_CAST(variacao_doze_meses AS FLOAT64) variacao_doze_meses -FROM basedosdados-staging.br_ibge_ipca15_staging.mes_categoria_rm AS t \ No newline at end of file +FROM basedosdados-staging.br_ibge_ipca15_staging.mes_categoria_rm AS t +{% if is_incremental() %} +WHERE DATE(CAST(ano AS INT64),CAST(mes AS INT64),1) > (SELECT MAX(DATE(CAST(ano AS INT64),CAST(mes AS INT64),1)) FROM {{ this }} ) +{% endif %} \ No newline at end of file From f06a4f2e4c9a2ecaa874684a272ef4cc82496a7d Mon Sep 17 00:00:00 2001 From: folhesgabriel Date: Fri, 22 Sep 2023 09:24:33 -0300 Subject: [PATCH 08/15] =?UTF-8?q?retira=20parti=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql | 8 -------- .../br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql | 8 -------- .../br_ibge_inpc__mes_categoria_municipio.sql | 8 -------- models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql | 8 -------- models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql | 8 -------- .../br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql | 8 -------- .../br_ibge_ipca__mes_categoria_municipio.sql | 8 -------- models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql | 8 -------- models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql | 8 -------- .../br_ibge_ipca15__mes_categoria_brasil.sql | 8 -------- .../br_ibge_ipca15__mes_categoria_municipio.sql | 8 -------- .../br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql | 8 -------- 12 files changed, 96 deletions(-) diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql index 0ed5d32b..d7912e4b 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_brasil.sql @@ -3,14 +3,6 @@ alias='mes_brasil', schema='br_ibge_inpc', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 1979, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql index c498a646..2b9094b8 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_brasil.sql @@ -3,14 +3,6 @@ alias='mes_categoria_brasil', schema='br_ibge_inpc', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2000, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql index 28ec8515..51477a72 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_municipio.sql @@ -3,14 +3,6 @@ alias='mes_categoria_municipio', schema='br_ibge_inpc', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2000, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql index 571a8f5e..a5a90b87 100644 --- a/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql +++ b/models/br_ibge_inpc/br_ibge_inpc__mes_categoria_rm.sql @@ -3,14 +3,6 @@ alias='mes_categoria_rm', schema='br_ibge_inpc', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2000, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql index 942aecce..cbd0cebd 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_brasil.sql @@ -3,14 +3,6 @@ alias='mes_brasil', schema='br_ibge_ipca', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 1979, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql index 5df9a92d..8248ebec 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_brasil.sql @@ -3,14 +3,6 @@ alias='mes_categoria_brasil', schema='br_ibge_ipca', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2020, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql index 6ee74cf9..0da922b2 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_municipio.sql @@ -3,14 +3,6 @@ alias='mes_categoria_municipio', schema='br_ibge_ipca', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2020, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql index 366e0b44..371c762f 100644 --- a/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql +++ b/models/br_ibge_ipca/br_ibge_ipca__mes_categoria_rm.sql @@ -3,14 +3,6 @@ alias='mes_categoria_rm', schema='br_ibge_ipca', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2020, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql index 25d9d477..05f5361b 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_brasil.sql @@ -3,14 +3,6 @@ alias='mes_brasil', schema='br_ibge_ipca15', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2000, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql index a04dae50..dd83d513 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_brasil.sql @@ -3,14 +3,6 @@ alias='mes_categoria_brasil', schema='br_ibge_ipca15', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2020, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql index a4400dca..f6746760 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_municipio.sql @@ -3,14 +3,6 @@ alias='mes_categoria_municipio', schema='br_ibge_ipca15', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2020, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} diff --git a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql index 98324365..d800cce9 100644 --- a/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql +++ b/models/br_ibge_ipca15/br_ibge_ipca15__mes_categoria_rm.sql @@ -3,14 +3,6 @@ alias='mes_categoria_rm', schema='br_ibge_ipca15', materialized='incremental', - partition_by = { - "field": "ano", - "data_type": "int64", - "range": { - "start": 2020, - "end": 2024, - "interval": 1} - }, pre_hook = "DROP ALL ROW ACCESS POLICIES ON {{ this }}", post_hook=['CREATE OR REPLACE ROW ACCESS POLICY allusers_filter ON {{this}} From d239f753761234e48ab686a7709cd5e1ae523391 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Fri, 22 Sep 2023 11:16:18 -0300 Subject: [PATCH 09/15] fix caps lock --- models/br_inep_enem/code/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 2b3cbe80..2ccd54f4 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -265,6 +265,7 @@ def drop_temporal_cov(key, temporal_cov): "tipo_prova_matematica", "tipo_prova_ciencias_natureza", "tipo_prova_ciencias_humanas", + "tipo_prova_linguagens_codigos" ]: dict_df["valor"] = dict_df["valor"].apply(lambda value: value.title()) From cfeebb9a58c20b7496ee890c508a647747242f45 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Fri, 22 Sep 2023 11:46:34 -0300 Subject: [PATCH 10/15] rename file --- ...icionario.sql => br_inep_enem__dicionario.sql} | 0 models/br_inep_enem/code/main.py | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) rename models/br_inep_enem/{dicionario.sql => br_inep_enem__dicionario.sql} (100%) diff --git a/models/br_inep_enem/dicionario.sql b/models/br_inep_enem/br_inep_enem__dicionario.sql similarity index 100% rename from models/br_inep_enem/dicionario.sql rename to models/br_inep_enem/br_inep_enem__dicionario.sql diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 2ccd54f4..0a9e617b 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -4,6 +4,7 @@ import zipfile import io import os +import basedosdados as bd YEARS = range(1998, 2022 + 1) @@ -265,7 +266,7 @@ def drop_temporal_cov(key, temporal_cov): "tipo_prova_matematica", "tipo_prova_ciencias_natureza", "tipo_prova_ciencias_humanas", - "tipo_prova_linguagens_codigos" + "tipo_prova_linguagens_codigos", ]: dict_df["valor"] = dict_df["valor"].apply(lambda value: value.title()) @@ -288,3 +289,15 @@ def drop_temporal_cov(key, temporal_cov): pd.concat([dict_microdados, dict_by_table]).to_excel(f"{OUTPUT}/dicionario.xlsx", index=False) # type: ignore + +pd.concat([dict_microdados, dict_by_table]).to_excel(f"{OUTPUT}/dicionario.parquet", index=False) # type: ignore + +# Upload dictionary +# tb = bd.Table(dataset_id="br_inep_enem", table_id="dicionario") + +# tb.create( +# path=f"{OUTPUT}/dicionario.parquet", +# if_table_exists="replace", +# if_storage_data_exists="replace", +# source_format="parquet", +# ) From c3c25d827bbbc33da2c3cd4b5c37c7cb86a9cd2d Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Fri, 22 Sep 2023 14:12:43 -0300 Subject: [PATCH 11/15] upload table --- models/br_inep_enem/code/main.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 0a9e617b..3175c867 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -97,6 +97,8 @@ def build_dictionary(year: int, path: str) -> pd.DataFrame: assert len(valid_value) == 1 df.loc[df["nome_coluna"] == col, "valor"] = valid_value[0] + df["chave"] = df["chave"].astype(str) + df["valor"] = df["valor"].astype(str) return df @@ -241,7 +243,7 @@ def make_temporal_cov(interval): cobertura_temporal = [make_temporal_cov(interval) for interval in intervals] - return (key, ",".join(cobertura_temporal), values_by_key[0]) + return (str(key), ",".join(cobertura_temporal), str(values_by_key[0])) ranges = [ make_ranges(key, value) for (key, value), _ in df.groupby(["chave", "valor"]) # type: ignore @@ -285,19 +287,14 @@ def drop_temporal_cov(key, temporal_cov): ] ) -dict_microdados.to_excel(f"{OUTPUT}/dicionario_microdados.xlsx", index=False) - - -pd.concat([dict_microdados, dict_by_table]).to_excel(f"{OUTPUT}/dicionario.xlsx", index=False) # type: ignore - -pd.concat([dict_microdados, dict_by_table]).to_excel(f"{OUTPUT}/dicionario.parquet", index=False) # type: ignore +pd.concat([dict_microdados, dict_by_table]).to_parquet(f"{OUTPUT}/dicionario.parquet", index=False) # type: ignore # Upload dictionary -# tb = bd.Table(dataset_id="br_inep_enem", table_id="dicionario") - -# tb.create( -# path=f"{OUTPUT}/dicionario.parquet", -# if_table_exists="replace", -# if_storage_data_exists="replace", -# source_format="parquet", -# ) +tb = bd.Table(dataset_id="br_inep_enem", table_id="dicionario") + +tb.create( + path=f"{OUTPUT}/dicionario.parquet", + if_table_exists="replace", + if_storage_data_exists="replace", + source_format="parquet", +) From b71afae4302317b1afd8bbd1ee68c0077402a05b Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Fri, 22 Sep 2023 14:13:44 -0300 Subject: [PATCH 12/15] add comment --- models/br_inep_enem/code/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index 3175c867..e12fcba4 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -1,3 +1,4 @@ +# Script para criar dicionario dos microdados e questionarios import pandas as pd import numpy as np import requests From 03aef937476b92720106632687a9616cf9555e4a Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Mon, 25 Sep 2023 18:29:20 -0300 Subject: [PATCH 13/15] br_inep_enem: dicionario fix typo --- models/br_inep_enem/br_inep_enem__dicionario.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/br_inep_enem/br_inep_enem__dicionario.sql b/models/br_inep_enem/br_inep_enem__dicionario.sql index 1a8ee86b..e7d8d95d 100644 --- a/models/br_inep_enem/br_inep_enem__dicionario.sql +++ b/models/br_inep_enem/br_inep_enem__dicionario.sql @@ -7,7 +7,7 @@ SELECT SAFE_CAST(id_tabela AS STRING) id_tabela, -SAFE_CAST(coluna AS STRING) coluna, +SAFE_CAST(nome_coluna AS STRING) nome_coluna, SAFE_CAST(chave AS STRING) chave, SAFE_CAST(cobertura_temporal AS STRING) cobertura_temporal, SAFE_CAST(valor AS STRING) valor From fda10aaea52ede51f9dc92a308e945ea96928a0a Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Mon, 25 Sep 2023 19:08:46 -0300 Subject: [PATCH 14/15] br_inep_enem: microdados fix table path --- models/br_inep_enem/br_inep_enem__dicionario.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/br_inep_enem/br_inep_enem__dicionario.sql b/models/br_inep_enem/br_inep_enem__dicionario.sql index e7d8d95d..378c67c7 100644 --- a/models/br_inep_enem/br_inep_enem__dicionario.sql +++ b/models/br_inep_enem/br_inep_enem__dicionario.sql @@ -11,4 +11,4 @@ SAFE_CAST(nome_coluna AS STRING) nome_coluna, SAFE_CAST(chave AS STRING) chave, SAFE_CAST(cobertura_temporal AS STRING) cobertura_temporal, SAFE_CAST(valor AS STRING) valor -FROM basedosdados-staging.br_inep_enem.dicionario AS t +FROM basedosdados-staging.br_inep_enem_staging.dicionario AS t From cf983e084402e75c99bb4a0a54fa08d5c7493274 Mon Sep 17 00:00:00 2001 From: Pedro Castro Date: Tue, 26 Sep 2023 11:00:17 -0300 Subject: [PATCH 15/15] [fix]: br_inep_enem: dicionario csv format --- models/br_inep_enem/code/main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/models/br_inep_enem/code/main.py b/models/br_inep_enem/code/main.py index e12fcba4..ed2d1f52 100644 --- a/models/br_inep_enem/code/main.py +++ b/models/br_inep_enem/code/main.py @@ -288,14 +288,13 @@ def drop_temporal_cov(key, temporal_cov): ] ) -pd.concat([dict_microdados, dict_by_table]).to_parquet(f"{OUTPUT}/dicionario.parquet", index=False) # type: ignore +pd.concat([dict_microdados, dict_by_table]).to_csv(f"{OUTPUT}/dicionario.csv", index=False) # type: ignore # Upload dictionary tb = bd.Table(dataset_id="br_inep_enem", table_id="dicionario") tb.create( - path=f"{OUTPUT}/dicionario.parquet", + path=f"{OUTPUT}/dicionario.csv", if_table_exists="replace", if_storage_data_exists="replace", - source_format="parquet", )