diff --git a/.github/workflows/lib-guardrails-tests.yml b/.github/workflows/lib-guardrails-tests.yml new file mode 100644 index 0000000000..93ccc5677e --- /dev/null +++ b/.github/workflows/lib-guardrails-tests.yml @@ -0,0 +1,57 @@ +# Workflow to run Guar tests +# +# Please read inputs to provide correct values. +# +name: SDK Lib Guardrails Tests +run-name: "SDK Lib Guardrails Tests ${{ github.ref_name }} by @${{ github.actor }}" +env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_ORG_ID: ${{ secrets.OPENAI_ORG_ID }} + GUARDRAILS_API_KEY: ${{ secrets.GUARDRAILS_API_KEY }} +on: + workflow_call: + +jobs: + tests: + name: Guardrails Python ${{matrix.python_version}} + runs-on: ubuntu-latest + defaults: + run: + working-directory: sdks/python + + strategy: + fail-fast: true + matrix: + python_version: ["3.10", "3.11", "3.12"] + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Setup Python ${{matrix.python_version}} + uses: actions/setup-python@v5 + with: + python-version: ${{matrix.python_version}} + + - name: Install opik + run: pip install . + + - name: Install test tools + run: | + cd ./tests + pip install --no-cache-dir --disable-pip-version-check -r test_requirements.txt + + - name: Install lib + run: | + cd ./tests + pip install --no-cache-dir --disable-pip-version-check -r library_integration/guardrails/requirements.txt + + - name: Install checks from guardrails hub + run: | + guardrails configure --token $GUARDRAILS_API_KEY --disable-metrics --enable-remote-inferencing; + guardrails hub install hub://guardrails/politeness_check + + - name: Run tests + run: | + cd ./tests/library_integration/guardrails/ + python -m pytest -vv . \ No newline at end of file diff --git a/.github/workflows/lib-integration-tests-runner.yml b/.github/workflows/lib-integration-tests-runner.yml index 1b209cbd77..8885dab222 100644 --- a/.github/workflows/lib-integration-tests-runner.yml +++ b/.github/workflows/lib-integration-tests-runner.yml @@ -17,6 +17,7 @@ on: - anthropic - aisuite - haystack + - guardrails schedule: - cron: "0 0 */1 * *" pull_request: @@ -80,3 +81,9 @@ jobs: if: contains(fromJSON('["haystack", "all"]'), needs.init_environment.outputs.LIBS) uses: ./.github/workflows/lib-haystack-tests.yml secrets: inherit + + guardrails_tests: + needs: [init_environment] + if: contains(fromJSON('["guardrails", "all"]'), needs.init_environment.outputs.LIBS) + uses: ./.github/workflows/lib-guardrails-tests.yml + secrets: inherit diff --git a/sdks/python/src/opik/integrations/guardrails/__init__.py b/sdks/python/src/opik/integrations/guardrails/__init__.py new file mode 100644 index 0000000000..40c0b2b0ea --- /dev/null +++ b/sdks/python/src/opik/integrations/guardrails/__init__.py @@ -0,0 +1,3 @@ +from .guardrails_tracker import track_guardrails + +__all__ = ["track_guardrails"] diff --git a/sdks/python/src/opik/integrations/guardrails/guardrails_decorator.py b/sdks/python/src/opik/integrations/guardrails/guardrails_decorator.py new file mode 100644 index 0000000000..fc3afd38fa --- /dev/null +++ b/sdks/python/src/opik/integrations/guardrails/guardrails_decorator.py @@ -0,0 +1,86 @@ +import logging +from typing import ( + Any, + AsyncGenerator, + Callable, + Dict, + Generator, + List, + Optional, + Tuple, + Union, +) + +from guardrails import validators + +from opik.decorator import arguments_helpers, base_track_decorator, inspect_helpers + +LOGGER = logging.getLogger(__name__) + +KWARGS_KEYS_TO_LOG_AS_INPUTS = ["value"] +RESPONSE_KEYS_TO_LOG_AS_OUTPUT = ["output"] + + +class GuardrailsValidatorValidateDecorator(base_track_decorator.BaseTrackDecorator): + def _start_span_inputs_preprocessor( + self, + func: Callable, + track_options: arguments_helpers.TrackOptions, + args: Tuple, + kwargs: Dict[str, Any], + ) -> arguments_helpers.StartSpanParameters: + name = track_options.name if track_options.name is not None else func.__name__ + metadata = track_options.metadata if track_options.metadata is not None else {} + metadata.update({"created_from": "guardrails"}) + input = ( + inspect_helpers.extract_inputs(func, args, kwargs) + if track_options.capture_input + else None + ) + + validator_instance = func.__self__ # type: ignore + model = ( + validator_instance.llm_callable + if hasattr(validator_instance, "llm_callable") + else None + ) + if model is not None: + metadata["model"] = model + + result = arguments_helpers.StartSpanParameters( + name=name, + input=input, + type=track_options.type, + metadata=metadata, + project_name=track_options.project_name, + model=model, + ) + + return result + + def _end_span_inputs_preprocessor( + self, output: Any, capture_output: bool + ) -> arguments_helpers.EndSpanParameters: + assert isinstance( + output, + validators.ValidationResult, + ) + tags = ["guardrails", output.outcome] + + result = arguments_helpers.EndSpanParameters( + output=output, + metadata=output.metadata, + tags=tags, + ) + + return result + + def _generators_handler( + self, + output: Any, + capture_output: bool, + generations_aggregator: Optional[Callable[[List[Any]], str]], + ) -> Optional[Union[Generator, AsyncGenerator]]: + return super()._generators_handler( + output, capture_output, generations_aggregator + ) diff --git a/sdks/python/src/opik/integrations/guardrails/guardrails_tracker.py b/sdks/python/src/opik/integrations/guardrails/guardrails_tracker.py new file mode 100644 index 0000000000..5ea4742f47 --- /dev/null +++ b/sdks/python/src/opik/integrations/guardrails/guardrails_tracker.py @@ -0,0 +1,39 @@ +from typing import Optional + +import guardrails + +from . import guardrails_decorator + + +def track_guardrails( + guard: guardrails.Guard, project_name: Optional[str] = None +) -> guardrails.Guard: + """ + Adds Opik tracking to a guardrails Guard instance. + + Every validation step will be logged as a trace. + + Args: + guard: An instance of Guard object. + project_name: The name of the project to log data. + + Returns: + The modified Guard instance with Opik tracking enabled for its validators. + """ + validators = guard._validators + decorator_factory = guardrails_decorator.GuardrailsValidatorValidateDecorator() + + for validator in validators: + if hasattr(validator.async_validate, "opik_tracked"): + continue + + validate_decorator = decorator_factory.track( + name=f"{validator.rail_alias}.validate", + project_name=project_name, + type="llm" if hasattr(validator, "llm_callable") else "general", + ) + setattr( + validator, "async_validate", validate_decorator(validator.async_validate) + ) # decorate async version because it is being called under the hood of guardrails engine + + return guard diff --git a/sdks/python/tests/library_integration/guardrails/__init__.py b/sdks/python/tests/library_integration/guardrails/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdks/python/tests/library_integration/guardrails/requirements.txt b/sdks/python/tests/library_integration/guardrails/requirements.txt new file mode 100644 index 0000000000..24c0d14ed6 --- /dev/null +++ b/sdks/python/tests/library_integration/guardrails/requirements.txt @@ -0,0 +1 @@ +guardrails-ai diff --git a/sdks/python/tests/library_integration/guardrails/test_guardrails.py b/sdks/python/tests/library_integration/guardrails/test_guardrails.py new file mode 100644 index 0000000000..8112a0629e --- /dev/null +++ b/sdks/python/tests/library_integration/guardrails/test_guardrails.py @@ -0,0 +1,69 @@ +import pytest +from guardrails import Guard, OnFailAction +from guardrails.hub import PolitenessCheck + +import opik +from opik.config import OPIK_PROJECT_DEFAULT_NAME +from opik.integrations.guardrails.guardrails_tracker import track_guardrails + +from ...testlib import ANY_BUT_NONE, ANY_DICT, SpanModel, TraceModel, assert_equal + + +@pytest.mark.parametrize( + "project_name, expected_project_name", + [ + (None, OPIK_PROJECT_DEFAULT_NAME), + ("guardrails-integration-test", "guardrails-integration-test"), + ], +) +def test_guardrails__trace_and_span_per_one_validation_check( + fake_backend, ensure_openai_configured, project_name, expected_project_name +): + politeness_check = PolitenessCheck( + llm_callable="gpt-3.5-turbo", on_fail=OnFailAction.NOOP + ) + + guard: Guard = Guard().use_many(politeness_check) + guard = track_guardrails(guard, project_name=project_name) + + result = guard.validate( + "Would you be so kind to pass me a cup of tea?", + ) # Both the guardrails pass + expected_result_tag = "pass" if result.validation_passed else "fail" + opik.flush_tracker() + + COMPETITOR_CHECK_EXPECTED_TRACE_TREE = TraceModel( + id=ANY_BUT_NONE, + name="guardrails/politeness_check.validate", + input={ + "value": "Would you be so kind to pass me a cup of tea?", + "metadata": ANY_DICT, + }, + output=ANY_BUT_NONE, + tags=["guardrails", expected_result_tag], + metadata={"created_from": "guardrails", "model": "gpt-3.5-turbo"}, + start_time=ANY_BUT_NONE, + end_time=ANY_BUT_NONE, + project_name=expected_project_name, + spans=[ + SpanModel( + id=ANY_BUT_NONE, + type="llm", + name="guardrails/politeness_check.validate", + input={ + "value": "Would you be so kind to pass me a cup of tea?", + "metadata": ANY_DICT, + }, + output=ANY_BUT_NONE, + tags=["guardrails", expected_result_tag], + metadata={"created_from": "guardrails", "model": "gpt-3.5-turbo"}, + start_time=ANY_BUT_NONE, + end_time=ANY_BUT_NONE, + project_name=expected_project_name, + model="gpt-3.5-turbo", + spans=[], + ) + ], + ) + + assert_equal(COMPETITOR_CHECK_EXPECTED_TRACE_TREE, fake_backend.trace_trees[0]) diff --git a/sdks/python/tests/testlib/assert_helpers.py b/sdks/python/tests/testlib/assert_helpers.py index 6c63ba045f..78d824db9e 100644 --- a/sdks/python/tests/testlib/assert_helpers.py +++ b/sdks/python/tests/testlib/assert_helpers.py @@ -40,9 +40,11 @@ def prepare_difference_report(expected: Any, actual: Any) -> str: def assert_equal(expected, actual): - # expected MUST be left argument so that __eq__ operators - # from our ANY* comparison helpers were called instead of __eq__ operators - # of the actual object + """ + expected MUST be left argument so that __eq__ operators + from our ANY* comparison helpers were called instead of __eq__ operators + of the actual object + """ assert expected == actual, f"Details: {prepare_difference_report(actual, expected)}"