Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Added null check for partitions batches #392

Merged
merged 7 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@
{%- set single_partition = [] -%}
{%- for col in row -%}


{%- set column_type = adapter.convert_type(table, loop.index0) -%}
{%- if column_type == 'integer' -%}
{%- set comp_func = '=' -%}
{%- if col is none -%}
{%- set value = 'null' -%}
{%- set comp_func = ' is ' -%}
{%- elif column_type == 'integer' -%}
{%- set value = col | string -%}
{%- elif column_type == 'string' -%}
{%- set value = "'" + col + "'" -%}
Expand All @@ -31,7 +36,7 @@
{%- do exceptions.raise_compiler_error('Need to add support for column type ' + column_type) -%}
{%- endif -%}
{%- set partition_key = adapter.format_one_partition_key(partitioned_by[loop.index0]) -%}
{%- do single_partition.append(partition_key + '=' + value) -%}
{%- do single_partition.append(partition_key + comp_func + value) -%}
{%- endfor -%}

{%- set single_partition_expression = single_partition | join(' and ') -%}
Expand Down
50 changes: 50 additions & 0 deletions tests/functional/adapter/test_partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,24 @@
cross join unnest(date_array) as t2(date_column)
"""

test_null_valued_partitions_model_sql = """
nicor88 marked this conversation as resolved.
Show resolved Hide resolved
with data as (
select
random() as col_1,
row_number() over() as id
from
unnest(sequence(1, 200))
)

select
col_1, id
from data
union all
select random() as col_1, NULL as id
union all
select random() as col_1, NULL as id
"""


class TestHiveTablePartitions:
@pytest.fixture(scope="class")
Expand Down Expand Up @@ -125,3 +143,35 @@ def test__check_incremental_run_with_partitions(self, project):
incremental_records_count = project.run_sql(model_run_result_row_count_query, fetch="all")[0][0]

assert incremental_records_count == 212


class TestHiveNullValuedPartitions:
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"models": {
"+table_type": "hive",
"+materialized": "table",
"+partitioned_by": ["date_column", "doy", "group_guid"],
}
}

@pytest.fixture(scope="class")
def models(self):
return {
"test_hive_partitions_null_values.sql": test_null_valued_partitions_model_sql,
}

def test__check_run_with_partitions(self, project):
relation_name = "test_hive_partitions_null_values"
model_run_result_row_count_query = f"select count(*) as records from {project.test_schema}.{relation_name}"

first_model_run = run_dbt(["run", "--select", relation_name])
first_model_run_result = first_model_run.results[0]

# check that the model run successfully
assert first_model_run_result.status == RunStatus.Success

records_count_first_run = project.run_sql(model_run_result_row_count_query, fetch="all")[0][0]

assert records_count_first_run == 202
Loading