From c9582c23237b0c42db6468b0ffb70497ea4d57f4 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 10 Dec 2024 09:28:07 -0600 Subject: [PATCH] Fix erroneous additional batch execution (#11113) * Update single batch test case to check for generic exceptions * Explicitly skip last final batch execution when there is only one batch Previously if there was only one batch, we would try to execute _two_ batches. The first batch, and a "last" non existent batch. This would result in an unhandled exception. * Changie doc --- .../unreleased/Fixes-20241209-133317.yaml | 6 +++++ core/dbt/task/run.py | 27 ++++++++++--------- .../functional/microbatch/test_microbatch.py | 16 +++++++++-- 3 files changed, 35 insertions(+), 14 deletions(-) create mode 100644 .changes/unreleased/Fixes-20241209-133317.yaml diff --git a/.changes/unreleased/Fixes-20241209-133317.yaml b/.changes/unreleased/Fixes-20241209-133317.yaml new file mode 100644 index 00000000000..4cac533662b --- /dev/null +++ b/.changes/unreleased/Fixes-20241209-133317.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix running of extra "last" batch when there is only one batch +time: 2024-12-09T13:33:17.253326-06:00 +custom: + Author: QMalcolm + Issue: "11112" diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index 649c58a1468..33da3dbf4e7 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -768,18 +768,21 @@ def handle_microbatch_model( # Wait until all submitted batches have completed while len(batch_results) != batch_idx: pass - # Final batch runs once all others complete to ensure post_hook runs at the end - self._submit_batch( - node=node, - adapter=runner.adapter, - relation_exists=relation_exists, - batches=batches, - batch_idx=batch_idx, - batch_results=batch_results, - pool=pool, - force_sequential_run=True, - skip=skip_batches, - ) + + # Only run "last" batch if there is more than one batch + if len(batches) != 1: + # Final batch runs once all others complete to ensure post_hook runs at the end + self._submit_batch( + node=node, + adapter=runner.adapter, + relation_exists=relation_exists, + batches=batches, + batch_idx=batch_idx, + batch_results=batch_results, + pool=pool, + force_sequential_run=True, + skip=skip_batches, + ) # Finalize run: merge results, track model run, and print final result line runner.merge_batch_results(result, batch_results) diff --git a/tests/functional/microbatch/test_microbatch.py b/tests/functional/microbatch/test_microbatch.py index 6d3eb960b76..8567dbe67a4 100644 --- a/tests/functional/microbatch/test_microbatch.py +++ b/tests/functional/microbatch/test_microbatch.py @@ -1057,11 +1057,20 @@ def pre_or_post_hook(event) -> bool: return EventCatcher(event_to_catch=JinjaLogDebug, predicate=pre_or_post_hook) # type: ignore + @pytest.fixture + def generic_exception_catcher(self) -> EventCatcher: + return EventCatcher(event_to_catch=GenericExceptionOnRun) # type: ignore + def test_microbatch( - self, mocker: MockerFixture, project, batch_log_catcher: EventCatcher + self, + project, + batch_log_catcher: EventCatcher, + generic_exception_catcher: EventCatcher, ) -> None: with patch_microbatch_end_time("2020-01-01 13:57:00"): - _ = run_dbt(["run"], callbacks=[batch_log_catcher.catch]) + _ = run_dbt( + ["run"], callbacks=[batch_log_catcher.catch, generic_exception_catcher.catch] + ) # There should be two logs as the pre-hook and post-hook should # both only be run once @@ -1071,3 +1080,6 @@ def test_microbatch( assert "pre-hook" in batch_log_catcher.caught_events[0].data.msg # type: ignore assert "20200101" in batch_log_catcher.caught_events[1].data.msg # type: ignore assert "post-hook" in batch_log_catcher.caught_events[1].data.msg # type: ignore + + # we had a bug where having only one batch caused a generic exception + assert len(generic_exception_catcher.caught_events) == 0