Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbt] fix br_inep_enem.dicionario #392

Merged
merged 2 commits into from
Jan 9, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 13 additions & 19 deletions models/br_inep_enem/code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,17 @@ def create_intervals(years):

return intervals

def make_temporal_cov(interval):
interval_sort = list(
map(
lambda year: ""
if year == max(YEARS) or year == min(YEARS)
else str(year),
np.sort(interval),
),
)
return "(1)".join(interval_sort)

def make_ranges(key, value):
values_by_key = df.loc[
(df["chave"] == key) & (df["valor"] == value), "valor"
Expand All @@ -231,17 +242,6 @@ def make_ranges(key, value):

intervals = [list(set(interval)) for interval in create_intervals(years)]

def make_temporal_cov(interval):
interval_sort = list(
map(
lambda year: ""
if year == max(YEARS) or year == min(YEARS)
else str(year),
np.sort(interval),
),
)
return "(1)".join(interval_sort)

cobertura_temporal = [make_temporal_cov(interval) for interval in intervals]

return (str(key), ",".join(cobertura_temporal), str(values_by_key[0]))
Expand All @@ -254,14 +254,8 @@ def make_temporal_cov(interval):

dict_df = pd.DataFrame(ranges, columns=basic_cols)

unique_keys = [i for (i, v) in dict_df["chave"].value_counts().items() if v == 1]

# Drop temporal coverage if key is unique
def drop_temporal_cov(key, temporal_cov):
return None if key in unique_keys else temporal_cov

dict_df["cobertura_temporal"] = dict_df[["chave", "cobertura_temporal"]].apply(
lambda values: drop_temporal_cov(*values), axis=1
dict_df["cobertura_temporal"] = dict_df["cobertura_temporal"].apply(
lambda value: None if value == "(1)" else value
)

# Last edits
Expand Down
Loading