From 5dfe192f9af588d33b67053d9a5d85f2f175f5ae Mon Sep 17 00:00:00 2001
From: Aaron Abbott <aaronabbott@google.com>
Date: Fri, 6 Dec 2024 04:52:35 +0000
Subject: [PATCH] Pretty format yaml cassettes

---
 .../test_vertexai_generate_content.yaml       | 98 ++++++++++++++-----
 .../tests/conftest.py                         | 69 +++++++++++++
 .../tests/test_gemini.py                      |  2 +-
 3 files changed, 143 insertions(+), 26 deletions(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/cassettes/test_vertexai_generate_content.yaml b/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/cassettes/test_vertexai_generate_content.yaml
index 2759f5730b..48cf3524e1 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/cassettes/test_vertexai_generate_content.yaml
+++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/cassettes/test_vertexai_generate_content.yaml
@@ -1,10 +1,24 @@
 interactions:
 - request:
-    body: "{\n  \"contents\": [\n    {\n      \"role\": \"user\",\n      \"parts\":
-      [\n        {\n          \"fileData\": {\n            \"mimeType\": \"image/jpeg\",\n
-      \           \"fileUri\": \"gs://generativeai-downloads/images/scones.jpg\"\n
-      \         }\n        },\n        {\n          \"text\": \"what is shown in this
-      image?\"\n        }\n      ]\n    }\n  ]\n}"
+    body: |-
+      {
+        "contents": [
+          {
+            "role": "user",
+            "parts": [
+              {
+                "fileData": {
+                  "mimeType": "image/jpeg",
+                  "fileUri": "gs://generativeai-downloads/images/scones.jpg"
+                }
+              },
+              {
+                "text": "what is shown in this image?"
+              }
+            ]
+          }
+        ]
+      }
     headers:
       Accept:
       - '*/*'
@@ -22,26 +36,60 @@ interactions:
     uri: https://us-central1-aiplatform.googleapis.com/v1beta1/projects/fake-project/locations/us-central1/publishers/google/models/gemini-pro-vision:generateContent?%24alt=json%3Benum-encoding%3Dint
   response:
     body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"role\":
-        \"model\",\n        \"parts\": [\n          {\n            \"text\": \" The
-        image shows a table with a cup of coffee, a bowl of blueberries, and a plate
-        of scones. There are also some flowers on the table.\"\n          }\n        ]\n
-        \     },\n      \"finishReason\": 1,\n      \"safetyRatings\": [\n        {\n
-        \         \"category\": 1,\n          \"probability\": 1,\n          \"probabilityScore\":
-        0.02331543,\n          \"severity\": 1,\n          \"severityScore\": 0.05493164\n
-        \       },\n        {\n          \"category\": 2,\n          \"probability\":
-        1,\n          \"probabilityScore\": 0.026367188,\n          \"severity\":
-        1,\n          \"severityScore\": 0.05493164\n        },\n        {\n          \"category\":
-        3,\n          \"probability\": 1,\n          \"probabilityScore\": 0.046142578,\n
-        \         \"severity\": 1,\n          \"severityScore\": 0.030639648\n        },\n
-        \       {\n          \"category\": 4,\n          \"probability\": 1,\n          \"probabilityScore\":
-        0.080566406,\n          \"severity\": 1,\n          \"severityScore\": 0.095214844\n
-        \       }\n      ],\n      \"avgLogprobs\": -0.11595650642148909\n    }\n
-        \ ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 265,\n    \"candidatesTokenCount\":
-        31,\n    \"totalTokenCount\": 296\n  },\n  \"modelVersion\": \"gemini-pro-vision\"\n}\n"
+      string: |-
+        {
+          "candidates": [
+            {
+              "content": {
+                "role": "model",
+                "parts": [
+                  {
+                    "text": " The image shows a table with a cup of coffee, a bowl of blueberries, and a plate of scones with blueberries on top. There are also pink flowers on the table."
+                  }
+                ]
+              },
+              "finishReason": 1,
+              "safetyRatings": [
+                {
+                  "category": 1,
+                  "probability": 1,
+                  "probabilityScore": 0.025512695,
+                  "severity": 1,
+                  "severityScore": 0.06933594
+                },
+                {
+                  "category": 2,
+                  "probability": 1,
+                  "probabilityScore": 0.026367188,
+                  "severity": 1,
+                  "severityScore": 0.07080078
+                },
+                {
+                  "category": 3,
+                  "probability": 1,
+                  "probabilityScore": 0.041503906,
+                  "severity": 1,
+                  "severityScore": 0.03466797
+                },
+                {
+                  "category": 4,
+                  "probability": 1,
+                  "probabilityScore": 0.091308594,
+                  "severity": 1,
+                  "severityScore": 0.09033203
+                }
+              ],
+              "avgLogprobs": -0.09557106835501535
+            }
+          ],
+          "usageMetadata": {
+            "promptTokenCount": 265,
+            "candidatesTokenCount": 35,
+            "totalTokenCount": 300
+          },
+          "modelVersion": "gemini-pro-vision"
+        }
     headers:
-      Cache-Control:
-      - private
       Content-Type:
       - application/json; charset=UTF-8
       Transfer-Encoding:
@@ -51,7 +99,7 @@ interactions:
       - X-Origin
       - Referer
       content-length:
-      - '1275'
+      - '1299'
     status:
       code: 200
       message: OK
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/conftest.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/conftest.py
index 183320cc68..32fc19a333 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/conftest.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/conftest.py
@@ -1,10 +1,12 @@
 """Unit tests configuration module."""
 
+import json
 import re
 from typing import Any, Mapping, MutableMapping
 
 import pytest
 import vertexai
+import yaml
 from google.auth.credentials import AnonymousCredentials
 from vcr import VCR
 from vcr.record_mode import RecordMode
@@ -97,3 +99,70 @@ def before_response_cb(response: MutableMapping[str, Any]):
         "before_record_response": before_response_cb,
         "ignore_hosts": ["oauth2.googleapis.com"],
     }
+
+
+class LiteralBlockScalar(str):
+    """Formats the string as a literal block scalar, preserving whitespace and
+    without interpreting escape characters"""
+
+
+def literal_block_scalar_presenter(dumper, data):
+    """Represents a scalar string as a literal block, via '|' syntax"""
+    return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+
+
+yaml.add_representer(LiteralBlockScalar, literal_block_scalar_presenter)
+
+
+def process_string_value(string_value):
+    """Pretty-prints JSON or returns long strings as a LiteralBlockScalar"""
+    try:
+        json_data = json.loads(string_value)
+        return LiteralBlockScalar(json.dumps(json_data, indent=2))
+    except (ValueError, TypeError):
+        if len(string_value) > 80:
+            return LiteralBlockScalar(string_value)
+    return string_value
+
+
+def convert_body_to_literal(data):
+    """Searches the data for body strings, attempting to pretty-print JSON"""
+    if isinstance(data, dict):
+        for key, value in data.items():
+            # Handle response body case (e.g., response.body.string)
+            if key == "body" and isinstance(value, dict) and "string" in value:
+                value["string"] = process_string_value(value["string"])
+
+            # Handle request body case (e.g., request.body)
+            elif key == "body" and isinstance(value, str):
+                data[key] = process_string_value(value)
+
+            else:
+                convert_body_to_literal(value)
+
+    elif isinstance(data, list):
+        for idx, choice in enumerate(data):
+            data[idx] = convert_body_to_literal(choice)
+
+    return data
+
+
+class PrettyPrintJSONBody:
+    """This makes request and response body recordings more readable."""
+
+    @staticmethod
+    def serialize(cassette_dict):
+        cassette_dict = convert_body_to_literal(cassette_dict)
+        return yaml.dump(
+            cassette_dict, default_flow_style=False, allow_unicode=True
+        )
+
+    @staticmethod
+    def deserialize(cassette_string):
+        return yaml.load(cassette_string, Loader=yaml.Loader)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def fixture_vcr(vcr):
+    vcr.register_serializer("yaml", PrettyPrintJSONBody)
+    return vcr
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/test_gemini.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/test_gemini.py
index 87a85e4961..59ae91cccf 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/test_gemini.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai-v2/tests/test_gemini.py
@@ -30,7 +30,7 @@ def test_vertexai_generate_content(exporter):
         "gen_ai.operation.name": "text_completion",
         "gen_ai.request.model": "gemini-pro-vision",
         "gen_ai.response.model": "gemini-pro-vision",
-        "gen_ai.usage.output_tokens": 31,
+        "gen_ai.usage.output_tokens": 35,
         "gen_ai.usage.input_tokens": 265,
     }