diff --git a/CHANGELOG.md b/CHANGELOG.md index f3fe99fe2..a965a303a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - feature: Added `Runner.failed_runs(..)` and `Evaluator.failed_evaluations(..)` to retrieve all failed run / evaluation lineages - feature: Added `.successful_example_outputs(..)` and `.failed_example_outputs(..)` to `RunRepository` to match the evaluation repository - feature: Added optional argument to set an id when creating a `Dataset` via `DatasetRepository.create_dataset(..)` +- feature: Traces now log exceptions using the `ErrorValue` type. ### Fixes diff --git a/src/intelligence_layer/core/tracer/tracer.py b/src/intelligence_layer/core/tracer/tracer.py index c473539d7..82e5f64bf 100644 --- a/src/intelligence_layer/core/tracer/tracer.py +++ b/src/intelligence_layer/core/tracer/tracer.py @@ -1,3 +1,4 @@ +import traceback from abc import ABC, abstractmethod from contextlib import AbstractContextManager from datetime import datetime, timezone @@ -126,6 +127,12 @@ def ensure_id(self, id: Optional[str]) -> str: return id if id is not None else str(uuid4()) +class ErrorValue(BaseModel): + error_type: str + message: str + stack_trace: str + + class Span(Tracer, AbstractContextManager["Span"]): """Captures a logical step within the overall workflow @@ -187,8 +194,15 @@ def __exit__( self, exc_type: Optional[type[BaseException]], exc_value: Optional[BaseException], - traceback: Optional[TracebackType], + _traceback: Optional[TracebackType], ) -> None: + if exc_type is not None and exc_value is not None and _traceback is not None: + error_value = ErrorValue( + error_type=str(exc_type.__qualname__), + message=str(exc_value), + stack_trace=str(traceback.format_exc()), + ) + self.log(error_value.message, error_value) self.end() diff --git a/src/intelligence_layer/evaluation/run/domain.py b/src/intelligence_layer/evaluation/run/domain.py index 5fbcbb60b..fa67879dd 100644 --- a/src/intelligence_layer/evaluation/run/domain.py +++ b/src/intelligence_layer/evaluation/run/domain.py @@ -20,7 +20,7 @@ class FailedExampleRun(BaseModel): @staticmethod def from_exception(exception: Exception) -> "FailedExampleRun": return FailedExampleRun( - error_message=f"{type(exception)}: {str(exception)}\n{traceback.format_exc()}" + error_message=f"{type(exception).__qualname__}: {str(exception)}\n{traceback.format_exc()}" ) diff --git a/tests/core/test_tracer.py b/tests/core/test_tracer.py index a5f6e8dd9..6fdad21f4 100644 --- a/tests/core/test_tracer.py +++ b/tests/core/test_tracer.py @@ -32,6 +32,7 @@ utc_now, ) from intelligence_layer.core.tracer.persistent_tracer import TracerLogEntryFailed +from intelligence_layer.core.tracer.tracer import ErrorValue @fixture @@ -113,6 +114,22 @@ def test_can_add_parent_and_child_entries() -> None: assert isinstance(parent.entries[0].entries[0], LogEntry) +def test_task_logs_error_value() -> None: + tracer = InMemoryTracer() + + with pytest.raises(ValueError): + with tracer.task_span("failing task", None): + raise ValueError("my bad, sorry") + + assert isinstance(tracer.entries[0], InMemoryTaskSpan) + assert isinstance(tracer.entries[0].entries[0], LogEntry) + error = tracer.entries[0].entries[0].value + assert isinstance(error, ErrorValue) + assert error.message == "my bad, sorry" + assert error.error_type == "ValueError" + assert error.stack_trace.startswith("Traceback") + + def test_task_automatically_logs_input_and_output( complete: Task[CompleteInput, CompleteOutput], ) -> None: