Skip to content

Commit

Permalink
Add Contract Tests for LLM Attributes and Models (#952)
Browse files Browse the repository at this point in the history
*Description of changes:*
Add new contract tests for Gen AI attributes and models.

*Test Plan:*
<img width="1511" alt="contract-tests-pr"
src="https://github.com/user-attachments/assets/78c861af-a2f4-48e6-96e7-5463b16ec69b">

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
yiyuan-he authored Nov 21, 2024
1 parent b58b94f commit 8469983
Show file tree
Hide file tree
Showing 8 changed files with 1,750 additions and 2,048 deletions.
2,947 changes: 917 additions & 2,030 deletions .github/patches/opentelemetry-java-instrumentation.patch

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -1798,8 +1798,8 @@ protected void doTestBedrockAgentDataSourceId() {
0.0);
}

protected void doTestBedrockRuntimeModelId() {
var response = appClient.get("/bedrockruntime/invokeModel").aggregate().join();
protected void doTestBedrockRuntimeAi21Jamba() {
var response = appClient.get("/bedrockruntime/invokeModel/ai21Jamba").aggregate().join();
var traces = mockCollectorClient.getTraces();
var metrics =
mockCollectorClient.getMetrics(
Expand All @@ -1809,9 +1809,9 @@ protected void doTestBedrockRuntimeModelId() {
AppSignalsConstants.LATENCY_METRIC));

var localService = getApplicationOtelServiceName();
var localOperation = "GET /bedrockruntime/invokeModel";
var localOperation = "GET /bedrockruntime/invokeModel/ai21Jamba";
String type = "AWS::Bedrock::Model";
String identifier = "anthropic.claude-v2";
String identifier = "ai21.jamba-1-5-mini-v1:0";
assertSpanClientAttributes(
traces,
bedrockRuntimeSpanName("InvokeModel"),
Expand All @@ -1828,7 +1828,371 @@ protected void doTestBedrockRuntimeModelId() {
200,
List.of(
assertAttribute(
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "anthropic.claude-v2")));
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "ai21.jamba-1-5-mini-v1:0"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.7"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.8"),
assertAttribute(SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[stop]"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "5"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "42")));
assertMetricClientAttributes(
metrics,
AppSignalsConstants.LATENCY_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
5000.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.FAULT_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.ERROR_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
}

protected void doTestBedrockRuntimeAmazonTitan() {
var response = appClient.get("/bedrockruntime/invokeModel/amazonTitan").aggregate().join();
var traces = mockCollectorClient.getTraces();
var metrics =
mockCollectorClient.getMetrics(
Set.of(
AppSignalsConstants.ERROR_METRIC,
AppSignalsConstants.FAULT_METRIC,
AppSignalsConstants.LATENCY_METRIC));

var localService = getApplicationOtelServiceName();
var localOperation = "GET /bedrockruntime/invokeModel/amazonTitan";
String type = "AWS::Bedrock::Model";
String identifier = "amazon.titan-text-premier-v1:0";
assertSpanClientAttributes(
traces,
bedrockRuntimeSpanName("InvokeModel"),
getBedrockRuntimeRpcServiceName(),
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
"bedrock.test",
8080,
"http://bedrock.test:8080",
200,
List.of(
assertAttribute(
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL,
"amazon.titan-text-premier-v1:0"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "100"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.7"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.9"),
assertAttribute(
SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[FINISHED]"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "10"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "15")));
assertMetricClientAttributes(
metrics,
AppSignalsConstants.LATENCY_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
5000.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.FAULT_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.ERROR_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
}

protected void doTestBedrockRuntimeAnthropicClaude() {
var response = appClient.get("/bedrockruntime/invokeModel/anthropicClaude").aggregate().join();

var traces = mockCollectorClient.getTraces();
var metrics =
mockCollectorClient.getMetrics(
Set.of(
AppSignalsConstants.ERROR_METRIC,
AppSignalsConstants.FAULT_METRIC,
AppSignalsConstants.LATENCY_METRIC));

var localService = getApplicationOtelServiceName();
var localOperation = "GET /bedrockruntime/invokeModel/anthropicClaude";
String type = "AWS::Bedrock::Model";
String identifier = "anthropic.claude-3-haiku-20240307-v1:0";

assertSpanClientAttributes(
traces,
bedrockRuntimeSpanName("InvokeModel"),
getBedrockRuntimeRpcServiceName(),
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
"bedrock.test",
8080,
"http://bedrock.test:8080",
200,
List.of(
assertAttribute(
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL,
"anthropic.claude-3-haiku-20240307-v1:0"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "512"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.6"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.53"),
assertAttribute(
SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[end_turn]"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "2095"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "503")));
assertMetricClientAttributes(
metrics,
AppSignalsConstants.LATENCY_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
5000.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.FAULT_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.ERROR_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
}

protected void doTestBedrockRuntimeCohereCommandR() {
var response = appClient.get("/bedrockruntime/invokeModel/cohereCommandR").aggregate().join();

var traces = mockCollectorClient.getTraces();
var metrics =
mockCollectorClient.getMetrics(
Set.of(
AppSignalsConstants.ERROR_METRIC,
AppSignalsConstants.FAULT_METRIC,
AppSignalsConstants.LATENCY_METRIC));

var localService = getApplicationOtelServiceName();
var localOperation = "GET /bedrockruntime/invokeModel/cohereCommandR";
String type = "AWS::Bedrock::Model";
String identifier = "cohere.command-r-v1:0";

assertSpanClientAttributes(
traces,
bedrockRuntimeSpanName("InvokeModel"),
getBedrockRuntimeRpcServiceName(),
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
"bedrock.test",
8080,
"http://bedrock.test:8080",
200,
List.of(
assertAttribute(
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "cohere.command-r-v1:0"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "4096"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.8"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.45"),
assertAttribute(
SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[COMPLETE]"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "9"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "16")));
assertMetricClientAttributes(
metrics,
AppSignalsConstants.LATENCY_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
5000.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.FAULT_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.ERROR_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
}

protected void doTestBedrockRuntimeMetaLlama() {
var response = appClient.get("/bedrockruntime/invokeModel/metaLlama").aggregate().join();

var traces = mockCollectorClient.getTraces();
var metrics =
mockCollectorClient.getMetrics(
Set.of(
AppSignalsConstants.ERROR_METRIC,
AppSignalsConstants.FAULT_METRIC,
AppSignalsConstants.LATENCY_METRIC));

var localService = getApplicationOtelServiceName();
var localOperation = "GET /bedrockruntime/invokeModel/metaLlama";
String type = "AWS::Bedrock::Model";
String identifier = "meta.llama3-70b-instruct-v1:0";

assertSpanClientAttributes(
traces,
bedrockRuntimeSpanName("InvokeModel"),
getBedrockRuntimeRpcServiceName(),
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
"bedrock.test",
8080,
"http://bedrock.test:8080",
200,
List.of(
assertAttribute(
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "meta.llama3-70b-instruct-v1:0"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "128"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.1"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.9"),
assertAttribute(SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[stop]"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "2095"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "503")));
assertMetricClientAttributes(
metrics,
AppSignalsConstants.LATENCY_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
5000.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.FAULT_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
assertMetricClientAttributes(
metrics,
AppSignalsConstants.ERROR_METRIC,
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
0.0);
}

protected void doTestBedrockRuntimeMistral() {
var response = appClient.get("/bedrockruntime/invokeModel/mistralAi").aggregate().join();

var traces = mockCollectorClient.getTraces();
var metrics =
mockCollectorClient.getMetrics(
Set.of(
AppSignalsConstants.ERROR_METRIC,
AppSignalsConstants.FAULT_METRIC,
AppSignalsConstants.LATENCY_METRIC));

var localService = getApplicationOtelServiceName();
var localOperation = "GET /bedrockruntime/invokeModel/mistralAi";
String type = "AWS::Bedrock::Model";
String identifier = "mistral.mistral-large-2402-v1:0";

assertSpanClientAttributes(
traces,
bedrockRuntimeSpanName("InvokeModel"),
getBedrockRuntimeRpcServiceName(),
localService,
localOperation,
getBedrockRuntimeServiceName(),
"InvokeModel",
type,
identifier,
"bedrock.test",
8080,
"http://bedrock.test:8080",
200,
List.of(
assertAttribute(
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL,
"mistral.mistral-large-2402-v1:0"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "4096"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.75"),
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.25"),
assertAttribute(SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[stop]"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "15"),
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "24")));
assertMetricClientAttributes(
metrics,
AppSignalsConstants.LATENCY_METRIC,
Expand Down
Loading

0 comments on commit 8469983

Please sign in to comment.