Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPIK-641] create guardrails integration #954

Merged
merged 20 commits into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/lib-guardrails-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Workflow to run Guar tests
#
# Please read inputs to provide correct values.
#
name: SDK Lib Guardrails Tests
run-name: "SDK Lib Guardrails Tests ${{ github.ref_name }} by @${{ github.actor }}"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_ORG_ID: ${{ secrets.OPENAI_ORG_ID }}
GUARDRAILS_API_KEY: ${{ secrets.GUARDRAILS_API_KEY }}
on:
workflow_call:

jobs:
tests:
name: Guardrails Python ${{matrix.python_version}}
runs-on: ubuntu-latest
defaults:
run:
working-directory: sdks/python

strategy:
fail-fast: true
matrix:
python_version: ["3.10", "3.11", "3.12"]

steps:
- name: Check out code
uses: actions/checkout@v4

- name: Setup Python ${{matrix.python_version}}
uses: actions/setup-python@v5
with:
python-version: ${{matrix.python_version}}

- name: Install opik
run: pip install .

- name: Install test tools
run: |
cd ./tests
pip install --no-cache-dir --disable-pip-version-check -r test_requirements.txt

- name: Install lib
run: |
cd ./tests
pip install --no-cache-dir --disable-pip-version-check -r library_integration/guardrails/requirements.txt

- name: Install checks from guardrails hub
run: |
guardrails configure --token $GUARDRAILS_API_KEY --disable-metrics --enable-remote-inferencing;
guardrails hub install hub://guardrails/politeness_check

- name: Run tests
run: |
cd ./tests/library_integration/guardrails/
python -m pytest -vv .
7 changes: 7 additions & 0 deletions .github/workflows/lib-integration-tests-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ on:
- anthropic
- aisuite
- haystack
- guardrails
schedule:
- cron: "0 0 */1 * *"
pull_request:
Expand Down Expand Up @@ -80,3 +81,9 @@ jobs:
if: contains(fromJSON('["haystack", "all"]'), needs.init_environment.outputs.LIBS)
uses: ./.github/workflows/lib-haystack-tests.yml
secrets: inherit

guardrails_tests:
needs: [init_environment]
if: contains(fromJSON('["guardrails", "all"]'), needs.init_environment.outputs.LIBS)
uses: ./.github/workflows/lib-guardrails-tests.yml
secrets: inherit
3 changes: 3 additions & 0 deletions sdks/python/src/opik/integrations/guardrails/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .guardrails_tracker import track_guardrails

__all__ = ["track_guardrails"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import logging
from typing import (
Any,
AsyncGenerator,
Callable,
Dict,
Generator,
List,
Optional,
Tuple,
Union,
)

from guardrails import validators

from opik.decorator import arguments_helpers, base_track_decorator, inspect_helpers

LOGGER = logging.getLogger(__name__)

KWARGS_KEYS_TO_LOG_AS_INPUTS = ["value"]
RESPONSE_KEYS_TO_LOG_AS_OUTPUT = ["output"]


class GuardrailsValidatorValidateDecorator(base_track_decorator.BaseTrackDecorator):
def _start_span_inputs_preprocessor(
self,
func: Callable,
track_options: arguments_helpers.TrackOptions,
args: Tuple,
kwargs: Dict[str, Any],
) -> arguments_helpers.StartSpanParameters:
name = track_options.name if track_options.name is not None else func.__name__
metadata = track_options.metadata if track_options.metadata is not None else {}
metadata.update({"created_from": "guardrails"})
input = (
inspect_helpers.extract_inputs(func, args, kwargs)
if track_options.capture_input
else None
)

validator_instance = func.__self__ # type: ignore
model = (
validator_instance.llm_callable
if hasattr(validator_instance, "llm_callable")
else None
)
if model is not None:
metadata["model"] = model

result = arguments_helpers.StartSpanParameters(
name=name,
input=input,
type=track_options.type,
metadata=metadata,
project_name=track_options.project_name,
model=model,
)

return result

def _end_span_inputs_preprocessor(
self, output: Any, capture_output: bool
) -> arguments_helpers.EndSpanParameters:
assert isinstance(
output,
validators.ValidationResult,
)
tags = ["guardrails", output.outcome]

result = arguments_helpers.EndSpanParameters(
output=output,
metadata=output.metadata,
tags=tags,
)

return result

def _generators_handler(
self,
output: Any,
capture_output: bool,
generations_aggregator: Optional[Callable[[List[Any]], str]],
) -> Optional[Union[Generator, AsyncGenerator]]:
return super()._generators_handler(
output, capture_output, generations_aggregator
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import Optional

import guardrails

from . import guardrails_decorator


def track_guardrails(
guard: guardrails.Guard, project_name: Optional[str] = None
) -> guardrails.Guard:
"""
Adds Opik tracking to a guardrails Guard instance.

Every validation step will be logged as a trace.

Args:
guard: An instance of Guard object.
project_name: The name of the project to log data.

Returns:
The modified Guard instance with Opik tracking enabled for its validators.
"""
validators = guard._validators
decorator_factory = guardrails_decorator.GuardrailsValidatorValidateDecorator()

for validator in validators:
if hasattr(validator.async_validate, "opik_tracked"):
continue

validate_decorator = decorator_factory.track(
name=f"{validator.rail_alias}.validate",
project_name=project_name,
type="llm" if hasattr(validator, "llm_callable") else "general",
)
setattr(
validator, "async_validate", validate_decorator(validator.async_validate)
) # decorate async version because it is being called under the hood of guardrails engine

return guard
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
guardrails-ai
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pytest
from guardrails import Guard, OnFailAction
from guardrails.hub import PolitenessCheck

import opik
from opik.config import OPIK_PROJECT_DEFAULT_NAME
from opik.integrations.guardrails.guardrails_tracker import track_guardrails

from ...testlib import ANY_BUT_NONE, ANY_DICT, SpanModel, TraceModel, assert_equal


@pytest.mark.parametrize(
"project_name, expected_project_name",
[
(None, OPIK_PROJECT_DEFAULT_NAME),
("guardrails-integration-test", "guardrails-integration-test"),
],
)
def test_guardrails__trace_and_span_per_one_validation_check(
fake_backend, ensure_openai_configured, project_name, expected_project_name
):
politeness_check = PolitenessCheck(
llm_callable="gpt-3.5-turbo", on_fail=OnFailAction.NOOP
)

guard: Guard = Guard().use_many(politeness_check)
guard = track_guardrails(guard, project_name=project_name)

result = guard.validate(
"Would you be so kind to pass me a cup of tea?",
) # Both the guardrails pass
expected_result_tag = "pass" if result.validation_passed else "fail"
opik.flush_tracker()

COMPETITOR_CHECK_EXPECTED_TRACE_TREE = TraceModel(
id=ANY_BUT_NONE,
name="guardrails/politeness_check.validate",
input={
"value": "Would you be so kind to pass me a cup of tea?",
"metadata": ANY_DICT,
},
output=ANY_BUT_NONE,
tags=["guardrails", expected_result_tag],
metadata={"created_from": "guardrails", "model": "gpt-3.5-turbo"},
start_time=ANY_BUT_NONE,
end_time=ANY_BUT_NONE,
project_name=expected_project_name,
spans=[
SpanModel(
id=ANY_BUT_NONE,
type="llm",
name="guardrails/politeness_check.validate",
input={
"value": "Would you be so kind to pass me a cup of tea?",
"metadata": ANY_DICT,
},
output=ANY_BUT_NONE,
tags=["guardrails", expected_result_tag],
metadata={"created_from": "guardrails", "model": "gpt-3.5-turbo"},
start_time=ANY_BUT_NONE,
end_time=ANY_BUT_NONE,
project_name=expected_project_name,
model="gpt-3.5-turbo",
spans=[],
)
],
)

assert_equal(COMPETITOR_CHECK_EXPECTED_TRACE_TREE, fake_backend.trace_trees[0])
8 changes: 5 additions & 3 deletions sdks/python/tests/testlib/assert_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ def prepare_difference_report(expected: Any, actual: Any) -> str:


def assert_equal(expected, actual):
# expected MUST be left argument so that __eq__ operators
# from our ANY* comparison helpers were called instead of __eq__ operators
# of the actual object
"""
expected MUST be left argument so that __eq__ operators
from our ANY* comparison helpers were called instead of __eq__ operators
of the actual object
"""
assert expected == actual, f"Details: {prepare_difference_report(actual, expected)}"


Expand Down
Loading