Merge branch 'main' into er/remove-fire_event_if_test

dbt-labs · Feb 7, 2024 · 7e02a6b · 7e02a6b
2 parents c26758a + 3f3c128
commit 7e02a6b
Show file tree

Hide file tree

Showing 7 changed files with 231 additions and 17 deletions.
diff --git a/.changes/unreleased/Fixes-20240206-160231.yaml b/.changes/unreleased/Fixes-20240206-160231.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Make invocation contexts more reliable in testing scenarios.
+time: 2024-02-06T16:02:31.81842-05:00
+custom:
+  Author: peterallenwebb
+  Issue: "52"
diff --git a/.github/workflows/scheduled_testing.yml b/.github/workflows/scheduled_testing.yml
@@ -12,11 +12,11 @@
 # Mainly on a schedule of 9:00, 13:00, 18:00 UTC everyday.
 # Manual trigger can also test on demand
 
-name: Scheduled Testing [ placeholders]
+name: Scheduled Testing
 
 on:
-  # schedule:
-  #   - cron: '0 9,13,18 * * *' # 9:00, 13:00, 18:00 UTC
+  schedule:
+    - cron: '0 9,13,18 * * *' # 9:00, 13:00, 18:00 UTC
   workflow_dispatch: # for manual triggering
 
 
@@ -25,12 +25,7 @@ permissions: read-all
 
 jobs:
   run_tests:
-    # uses: dbt-labs/actions/.github/workflows/release-branch-tests.yml@main
-    # with:
-    #   workflows_to_run: '["ci_tests.yml", "build.yml", "ci_dbt_core_testing.yml"]'
-    # secrets: inherit
-    runs-on: ubuntu-latest
-    steps:
-
-      - name: Placeholder
-        run: echo "This is a placeholder job"
+    uses: dbt-labs/actions/.github/workflows/release-branch-tests.yml@main
+    with:
+      workflows_to_run: '["ci_tests.yml", "build.yml", "ci_dbt_core_testing.yml"]'
+    secrets: inherit
diff --git a/dbt_common/__about__.py b/dbt_common/__about__.py
@@ -1 +1 @@
-version = "0.1.3"
+version = "0.1.4"
diff --git a/dbt_common/context.py b/dbt_common/context.py
@@ -1,4 +1,4 @@
-from contextvars import ContextVar
+from contextvars import ContextVar, copy_context
 from typing import List, Mapping, Optional
 
 from dbt_common.constants import SECRET_ENV_PREFIX
@@ -26,10 +26,23 @@ def env_secrets(self) -> List[str]:
 _INVOCATION_CONTEXT_VAR: ContextVar[InvocationContext] = ContextVar("DBT_INVOCATION_CONTEXT_VAR")
 
 
+def _reliably_get_invocation_var() -> ContextVar:
+    invocation_var: Optional[ContextVar] = next(
+        (cv for cv in copy_context() if cv.name == _INVOCATION_CONTEXT_VAR.name), None
+    )
+
+    if invocation_var is None:
+        invocation_var = _INVOCATION_CONTEXT_VAR
+
+    return invocation_var
+
+
 def set_invocation_context(env: Mapping[str, str]) -> None:
-    _INVOCATION_CONTEXT_VAR.set(InvocationContext(env))
+    invocation_var = _reliably_get_invocation_var()
+    invocation_var.set(InvocationContext(env))
 
 
 def get_invocation_context() -> InvocationContext:
-    ctx = _INVOCATION_CONTEXT_VAR.get()
+    invocation_var = _reliably_get_invocation_var()
+    ctx = invocation_var.get()
     return ctx
diff --git a/pyproject.toml b/pyproject.toml
@@ -94,7 +94,6 @@ features = ["test"]
 [tool.hatch.envs.test.scripts]
 unit = "python -m pytest --cov=dbt_common --cov-report=xml {args:tests/unit}"
 
-
 ### Linting settings, envs & scripts
 
 [tool.hatch.envs.lint]

diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py
@@ -0,0 +1,127 @@
+import re
+
+import pytest
+
+from dbt_common.events import types
+from dbt_common.events.base_types import msg_from_base_event
+from dbt_common.events.base_types import (
+    BaseEvent,
+    DebugLevel,
+    DynamicLevel,
+    ErrorLevel,
+    InfoLevel,
+    TestLevel,
+    WarnLevel,
+)
+from dbt_common.events.functions import msg_to_dict, msg_to_json
+
+
+# takes in a class and finds any subclasses for it
+def get_all_subclasses(cls):
+    all_subclasses = []
+    for subclass in cls.__subclasses__():
+        if subclass not in [TestLevel, DebugLevel, WarnLevel, InfoLevel, ErrorLevel, DynamicLevel]:
+            all_subclasses.append(subclass)
+        all_subclasses.extend(get_all_subclasses(subclass))
+    return set(all_subclasses)
+
+
+class TestEventCodes:
+    # checks to see if event codes are duplicated to keep codes singluar and clear.
+    # also checks that event codes follow correct namming convention ex. E001
+    def test_event_codes(self):
+        all_concrete = get_all_subclasses(BaseEvent)
+        all_codes = set()
+
+        for event_cls in all_concrete:
+            code = event_cls.code(event_cls)
+            # must be in the form 1 capital letter, 3 digits
+            assert re.match("^[A-Z][0-9]{3}", code)
+            # cannot have been used already
+            assert (
+                code not in all_codes
+            ), f"{code} is assigned more than once. Check types.py for duplicates."
+            all_codes.add(code)
+
+
+class TestEventJSONSerialization:
+    """Attempts to test that every event is serializable to json.
+
+    event types that take `Any` are not possible to test in this way since some will serialize
+    just fine and others won't.
+    """
+
+    SAMPLE_VALUES = [
+        # N.B. Events instantiated here include the module prefix in order to
+        # avoid having the entire list twice in the code.
+        # M - Deps generation ======================
+        types.RetryExternalCall(attempt=0, max=0),
+        types.RecordRetryException(exc=""),
+        # Z - misc ======================
+        types.SystemCouldNotWrite(path="", reason="", exc=""),
+        types.SystemExecutingCmd(cmd=[""]),
+        types.SystemStdOut(bmsg=str(b"")),
+        types.SystemStdErr(bmsg=str(b"")),
+        types.SystemReportReturnCode(returncode=0),
+        types.Formatting(),
+        types.Note(msg="This is a note."),
+    ]
+
+    def test_all_serializable(self):
+        all_non_abstract_events = set(
+            get_all_subclasses(BaseEvent),
+        )
+        all_event_values_list = list(map(lambda x: x.__class__, self.SAMPLE_VALUES))
+        diff = all_non_abstract_events.difference(set(all_event_values_list))
+        assert (
+            not diff
+        ), f"{diff}test is missing concrete values in `SAMPLE_VALUES`. Please add the values for the aforementioned event classes"
+
+        # make sure everything in the list is a value not a type
+        for event in self.SAMPLE_VALUES:
+            assert not isinstance(event, type)
+
+        # if we have everything we need to test, try to serialize everything
+        count = 0
+        for event in self.SAMPLE_VALUES:
+            msg = msg_from_base_event(event)
+            print(f"--- msg: {msg.info.name}")
+            # Serialize to dictionary
+            try:
+                msg_to_dict(msg)
+            except Exception as e:
+                raise Exception(
+                    f"{event} can not be converted to a dict. Originating exception: {e}"
+                )
+            # Serialize to json
+            try:
+                msg_to_json(msg)
+            except Exception as e:
+                raise Exception(f"{event} is not serializable to json. Originating exception: {e}")
+            # Serialize to binary
+            try:
+                msg.SerializeToString()
+            except Exception as e:
+                raise Exception(
+                    f"{event} is not serializable to binary protobuf. Originating exception: {e}"
+                )
+            count += 1
+        print(f"--- Found {count} events")
+
+
+def test_bad_serialization():
+    """Tests that bad serialization enters the proper exception handling
+
+    When pytest is in use the exception handling of `BaseEvent` raises an
+    exception. When pytest isn't present, it fires a Note event. Thus to test
+    that bad serializations are properly handled, the best we can do is test
+    that the exception handling path is used.
+    """
+
+    with pytest.raises(Exception) as excinfo:
+        types.Note(param_event_doesnt_have="This should break")
+
+    assert (
+        str(excinfo.value)
+        == "[Note]: Unable to parse dict {'param_event_doesnt_have': 'This should break'}"
+    )
diff --git a/tests/unit/test_proto_events.py b/tests/unit/test_proto_events.py
@@ -0,0 +1,74 @@
+from dbt_common.events.functions import msg_to_dict, msg_to_json, reset_metadata_vars
+from dbt_common.events import types_pb2
+from dbt_common.events.base_types import msg_from_base_event
+from dbt_common.events.types import RetryExternalCall
+from google.protobuf.json_format import MessageToDict
+
+info_keys = {
+    "name",
+    "code",
+    "msg",
+    "level",
+    "invocation_id",
+    "pid",
+    "thread",
+    "ts",
+    "extra",
+    "category",
+}
+
+
+def test_events():
+    # M020 event
+    event_code = "M020"
+    event = RetryExternalCall(attempt=3, max=5)
+    msg = msg_from_base_event(event)
+    msg_dict = msg_to_dict(msg)
+    msg_json = msg_to_json(msg)
+    serialized = msg.SerializeToString()
+    assert "Retrying external call. Attempt: 3" in str(serialized)
+    assert set(msg_dict.keys()) == {"info", "data"}
+    assert set(msg_dict["data"].keys()) == {"attempt", "max"}
+    assert set(msg_dict["info"].keys()) == info_keys
+    assert msg_json
+    assert msg.info.code == event_code
+
+    # Extract EventInfo from serialized message
+    generic_msg = types_pb2.GenericMessage()
+    generic_msg.ParseFromString(serialized)
+    assert generic_msg.info.code == event_code
+    # get the message class for the real message from the generic message
+    message_class = getattr(types_pb2, f"{generic_msg.info.name}Msg")
+    new_msg = message_class()
+    new_msg.ParseFromString(serialized)
+    assert new_msg.info.code == msg.info.code
+    assert new_msg.data.attempt == msg.data.attempt
+
+
+def test_extra_dict_on_event(monkeypatch):
+    monkeypatch.setenv("DBT_ENV_CUSTOM_ENV_env_key", "env_value")
+
+    reset_metadata_vars()
+
+    event_code = "M020"
+    event = RetryExternalCall(attempt=3, max=5)
+    msg = msg_from_base_event(event)
+    msg_dict = msg_to_dict(msg)
+    assert set(msg_dict["info"].keys()) == info_keys
+    extra_dict = {"env_key": "env_value"}
+    assert msg.info.extra == extra_dict
+    serialized = msg.SerializeToString()
+
+    # Extract EventInfo from serialized message
+    generic_msg = types_pb2.GenericMessage()
+    generic_msg.ParseFromString(serialized)
+    assert generic_msg.info.code == event_code
+    # get the message class for the real message from the generic message
+    message_class = getattr(types_pb2, f"{generic_msg.info.name}Msg")
+    new_msg = message_class()
+    new_msg.ParseFromString(serialized)
+    new_msg_dict = MessageToDict(new_msg)
+    assert new_msg_dict["info"]["extra"] == msg.info.extra
+
+    # clean up
+    reset_metadata_vars()