diff --git a/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go b/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go index 4791376a3..98789f89a 100644 --- a/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go +++ b/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go @@ -105,6 +105,9 @@ func (r *AIRateLimitPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Re xds.UpdateRateLimiterPolicies(conf.CommonController.Server.Label) } else { loggers.LoggerAPKOperator.Infof("ratelimits found") + if ratelimitPolicy.Spec.Override == nil { + ratelimitPolicy.Spec.Override = ratelimitPolicy.Spec.Default + } if ratelimitPolicy.Spec.TargetRef.Name != "" { r.ods.AddorUpdateAIRatelimitToStore(ratelimitKey, ratelimitPolicy.Spec) xds.UpdateRateLimitXDSCacheForAIRatelimitPolicies(r.ods.GetAIRatelimitPolicySpecs()) diff --git a/runtime/config-deployer-service/ballerina/APIClient.bal b/runtime/config-deployer-service/ballerina/APIClient.bal index ec7ea4012..a5ecaa447 100644 --- a/runtime/config-deployer-service/ballerina/APIClient.bal +++ b/runtime/config-deployer-service/ballerina/APIClient.bal @@ -232,6 +232,11 @@ public class APIClient { serviceEntry: false, url: self.constructURlFromService(sandboxEndpointConfig.endpoint) }; + AIRatelimit? aiRatelimit = sandboxEndpointConfig.aiRatelimit; + if aiRatelimit is AIRatelimit && aiRatelimit.enabled { + model:AIRateLimitPolicy airl = self.generateAIRateLimitPolicyCR(apkConf, aiRatelimit.token, aiRatelimit.request, backendService.metadata.name, organization); + apiArtifact.aiRatelimitPolicies[airl.metadata.name] = airl; + } } } if (endpointType == () || endpointType == PRODUCTION_TYPE) { @@ -246,6 +251,11 @@ public class APIClient { serviceEntry: false, url: self.constructURlFromService(productionEndpointConfig.endpoint) }; + AIRatelimit? aiRatelimit = productionEndpointConfig.aiRatelimit; + if aiRatelimit is AIRatelimit && aiRatelimit.enabled { + model:AIRateLimitPolicy airl = self.generateAIRateLimitPolicyCR(apkConf, aiRatelimit.token, aiRatelimit.request, backendService.metadata.name, organization); + apiArtifact.aiRatelimitPolicies[airl.metadata.name] = airl; + } } } return endpointIdMap; @@ -1506,6 +1516,29 @@ public class APIClient { return rateLimitPolicyCR; } + public isolated function generateAIRateLimitPolicyCR(APKConf apkConf, TokenAIRL tokenAIRL, RequestAIRL requestAIRL, string targetRefName, commons:Organization organization) returns model:AIRateLimitPolicy { + string apiIdentifierHash = crypto:hashSha1((apkConf.name + apkConf.version).toBytes()).toBase16(); + model:AIRateLimitPolicy aiRateLimitPolicyCR = { + metadata: { + name: self.retrieveAIRateLimitPolicyName(apiIdentifierHash, targetRefName), + labels: self.getLabels(apkConf, organization) + }, + spec: { + default: { + organization: organization.name, + requestCount: {unit: requestAIRL.unit, requestsPerUnit: requestAIRL.requestLimit}, + tokenCount: {unit: tokenAIRL.unit, requestTokenCount: tokenAIRL.promptLimit, responseTokenCount: tokenAIRL.completionLimit, totalTokenCount: tokenAIRL.totalLimit} + }, + targetRef: { + group: "dp.wso2.com", + kind: "Backend", + name: targetRefName + } + } + }; + return aiRateLimitPolicyCR; + } + isolated function retrieveRateLimitData(RateLimit rateLimit, commons:Organization organization) returns model:RateLimitData { model:RateLimitData rateLimitData = { api: { @@ -1933,6 +1966,10 @@ public class APIClient { } } + public isolated function retrieveAIRateLimitPolicyName(string apiID, string targetRef) returns string { + return "airl-" + apiID + "-" + targetRef; + } + private isolated function validateAndRetrieveAPKConfiguration(json apkconfJson) returns APKConf|commons:APKError? { do { runtimeapi:APKConfValidationResponse validationResponse = check apkConfValidator.validate(apkconfJson.toJsonString()); diff --git a/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal b/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal index 108e05829..8df53c519 100644 --- a/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal +++ b/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal @@ -234,6 +234,10 @@ on fail var e { string yamlString = check self.convertJsonToYaml(rateLimitPolicy.toJsonString()); _ = check self.storeFile(yamlString, rateLimitPolicy.metadata.name, zipDir); } + foreach model:AIRateLimitPolicy airateLimitPolicy in apiArtifact.aiRatelimitPolicies { + string yamlString = check self.convertJsonToYaml(airateLimitPolicy.toJsonString()); + _ = check self.storeFile(yamlString, airateLimitPolicy.metadata.name, zipDir); + } foreach model:APIPolicy apiPolicy in apiArtifact.apiPolicies { string yamlString = check self.convertJsonToYaml(apiPolicy.toJsonString()); _ = check self.storeFile(yamlString, apiPolicy.metadata.name, zipDir); diff --git a/runtime/config-deployer-service/ballerina/DeployerClient.bal b/runtime/config-deployer-service/ballerina/DeployerClient.bal index 802083886..27fb38cd2 100644 --- a/runtime/config-deployer-service/ballerina/DeployerClient.bal +++ b/runtime/config-deployer-service/ballerina/DeployerClient.bal @@ -102,6 +102,7 @@ public class DeployerClient { _ = check self.deleteScopeCrsForAPI(existingAPI, apiArtifact?.organization); check self.deleteBackends(existingAPI, apiArtifact?.organization); check self.deleteRateLimitPolicyCRs(existingAPI, apiArtifact?.organization); + check self.deleteAIRateLimitPolicyCRs(existingAPI, apiArtifact?.organization); check self.deleteAPIPolicyCRs(existingAPI, apiArtifact?.organization); check self.deleteInterceptorServiceCRs(existingAPI, apiArtifact?.organization); check self.deleteBackendJWTConfig(existingAPI, apiArtifact?.organization); @@ -121,6 +122,7 @@ public class DeployerClient { check self.deployBackendServices(apiArtifact, ownerReference); check self.deployAuthenticationCRs(apiArtifact, ownerReference); check self.deployRateLimitPolicyCRs(apiArtifact, ownerReference); + check self.deployAIRateLimitPolicyCRs(apiArtifact, ownerReference); check self.deployInterceptorServiceCRs(apiArtifact, ownerReference); check self.deployBackendJWTConfigs(apiArtifact, ownerReference); check self.deployAPIPolicyCRs(apiArtifact, ownerReference); @@ -660,6 +662,30 @@ public class DeployerClient { } } + private isolated function deployAIRateLimitPolicyCRs(model:APIArtifact apiArtifact, model:OwnerReference ownerReference) returns error? { + foreach model:AIRateLimitPolicy rateLimitPolicy in apiArtifact.aiRatelimitPolicies { + rateLimitPolicy.metadata.ownerReferences = [ownerReference]; + http:Response deployRateLimitPolicyResult = check deployAIRateLimitPolicyCR(rateLimitPolicy, apiArtifact?.namespace); + if deployRateLimitPolicyResult.statusCode == http:STATUS_CREATED { + log:printDebug("Deployed AIRateLimitPolicy Successfully" + rateLimitPolicy.toString()); + } else if deployRateLimitPolicyResult.statusCode == http:STATUS_CONFLICT { + log:printDebug("AIRateLimitPolicy already exists" + rateLimitPolicy.toString()); + model:AIRateLimitPolicy rateLimitPolicyFromK8s = check getAIRateLimitPolicyCR(rateLimitPolicy.metadata.name, apiArtifact?.namespace); + rateLimitPolicy.metadata.resourceVersion = rateLimitPolicyFromK8s.metadata.resourceVersion; + http:Response rateLimitPolicyCR = check updateAIRateLimitPolicyCR(rateLimitPolicy, apiArtifact?.namespace); + if rateLimitPolicyCR.statusCode != http:STATUS_OK { + json responsePayLoad = check rateLimitPolicyCR.getJsonPayload(); + model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status); + check self.handleK8sTimeout(statusResponse); + } + } else { + json responsePayLoad = check deployRateLimitPolicyResult.getJsonPayload(); + model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status); + check self.handleK8sTimeout(statusResponse); + } + } + } + private isolated function deleteRateLimitPolicyCRs(model:API api, string organization) returns commons:APKError? { do { model:RateLimitPolicyList|http:ClientError rateLimitPolicyCrListResponse = check getRateLimitPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, api.metadata?.namespace, organization); @@ -684,6 +710,29 @@ public class DeployerClient { } } + private isolated function deleteAIRateLimitPolicyCRs(model:API api, string organization) returns commons:APKError? { + do { + model:AIRateLimitPolicyList|http:ClientError aiRateLimitPolicyCrListResponse = check getAIRateLimitPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, api.metadata?.namespace, organization); + if aiRateLimitPolicyCrListResponse is model:AIRateLimitPolicyList { + foreach model:AIRateLimitPolicy item in aiRateLimitPolicyCrListResponse.items { + http:Response|http:ClientError rateLimitPolicyCRDeletionResponse = deleteAIRateLimitPolicyCR(item.metadata.name, item.metadata?.namespace); + if rateLimitPolicyCRDeletionResponse is http:Response { + if rateLimitPolicyCRDeletionResponse.statusCode != http:STATUS_OK { + json responsePayLoad = check rateLimitPolicyCRDeletionResponse.getJsonPayload(); + model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status); + check self.handleK8sTimeout(statusResponse); + } + } else { + log:printError("Error occured while deleting AI rate limit policy"); + } + } + return; + } + } on fail var e { + log:printError("Error occured deleting AI rate limit policy", e); + return e909022("Error occured deleting AI rate limit policy", e); + } + } private isolated function deleteAPIPolicyCRs(model:API api, string organization) returns commons:APKError? { do { model:APIPolicyList|http:ClientError apiPolicyCrListResponse = check getAPIPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, api.metadata?.namespace, organization); diff --git a/runtime/config-deployer-service/ballerina/K8sClient.bal b/runtime/config-deployer-service/ballerina/K8sClient.bal index ed1219c0d..e97309129 100644 --- a/runtime/config-deployer-service/ballerina/K8sClient.bal +++ b/runtime/config-deployer-service/ballerina/K8sClient.bal @@ -251,26 +251,51 @@ isolated function deployRateLimitPolicyCR(model:RateLimitPolicy rateLimitPolicy, return k8sApiServerEp->post(endpoint, rateLimitPolicy, targetType = http:Response); } +isolated function deployAIRateLimitPolicyCR(model:AIRateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError { + string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies"; + return k8sApiServerEp->post(endpoint, rateLimitPolicy, targetType = http:Response); +} + isolated function updateRateLimitPolicyCR(model:RateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError { string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + rateLimitPolicy.metadata.name; return k8sApiServerEp->put(endpoint, rateLimitPolicy, targetType = http:Response); } +isolated function updateAIRateLimitPolicyCR(model:AIRateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError { + string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + rateLimitPolicy.metadata.name; + return k8sApiServerEp->put(endpoint, rateLimitPolicy, targetType = http:Response); +} + isolated function getRateLimitPolicyCR(string name, string namespace) returns model:RateLimitPolicy|http:ClientError { string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + name; return k8sApiServerEp->get(endpoint, targetType = model:RateLimitPolicy); } +isolated function getAIRateLimitPolicyCR(string name, string namespace) returns model:AIRateLimitPolicy|http:ClientError { + string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + name; + return k8sApiServerEp->get(endpoint, targetType = model:AIRateLimitPolicy); +} + isolated function deleteRateLimitPolicyCR(string name, string namespace) returns http:Response|http:ClientError { string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + name; return k8sApiServerEp->delete(endpoint, targetType = http:Response); } +isolated function deleteAIRateLimitPolicyCR(string name, string namespace) returns http:Response|http:ClientError { + string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + name; + return k8sApiServerEp->delete(endpoint, targetType = http:Response); +} + isolated function getRateLimitPolicyCRsForAPI(string apiName, string apiVersion, string namespace, string organization) returns model:RateLimitPolicyList|http:ClientError|error { string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies?labelSelector=" + check generateUrlEncodedLabelSelector(apiName, apiVersion, organization); return k8sApiServerEp->get(endpoint, targetType = model:RateLimitPolicyList); } +isolated function getAIRateLimitPolicyCRsForAPI(string apiName, string apiVersion, string namespace, string organization) returns model:AIRateLimitPolicyList|http:ClientError|error { + string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies?labelSelector=" + check generateUrlEncodedLabelSelector(apiName, apiVersion, organization); + return k8sApiServerEp->get(endpoint, targetType = model:AIRateLimitPolicyList); +} + isolated function deployAPIPolicyCR(model:APIPolicy apiPolicy, string namespace) returns http:Response|http:ClientError { string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/apipolicies"; return k8sApiServerEp->post(endpoint, apiPolicy, targetType = http:Response); diff --git a/runtime/config-deployer-service/ballerina/modules/model/AIRatelimitPolicy.bal b/runtime/config-deployer-service/ballerina/modules/model/AIRatelimitPolicy.bal new file mode 100644 index 000000000..6c7a0f8f4 --- /dev/null +++ b/runtime/config-deployer-service/ballerina/modules/model/AIRatelimitPolicy.bal @@ -0,0 +1,47 @@ +// +// Copyright (c) 2024, WSO2 LLC. (http://www.wso2.com). +// +// WSO2 LLC. licenses this file to you under the Apache License, +// Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +public type AIRateLimitPolicy record { + string apiVersion = "dp.wso2.com/v1alpha3"; + string kind = "AIRateLimitPolicy"; + Metadata metadata; + AIRateLimitPolicySpec spec; +}; + +public type AIRateLimitPolicySpec record {| + AIRateLimitPolicyData override?; + AIRateLimitPolicyData default?; + TargetRef targetRef; +|}; + +public type AIRateLimitPolicyData record { + string organization; + TokenAIRL tokenCount; + RequestAIRL requestCount; +}; + +public type TokenAIRL record { + string unit; + int requestTokenCount; + int responseTokenCount; + int totalTokenCount; +}; + +public type RequestAIRL record { + string unit; + int requestsPerUnit; +}; diff --git a/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal b/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal index b40f95448..2e18b2add 100644 --- a/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal +++ b/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal @@ -15,6 +15,7 @@ public type APIArtifact record {| map authenticationMap = {}; map scopes = {}; map rateLimitPolicies = {}; + map aiRatelimitPolicies = {}; map apiPolicies = {}; map interceptorServices = {}; boolean sandboxEndpointAvailable = false; diff --git a/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal b/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal index a24cffbf2..8cf969c21 100644 --- a/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal +++ b/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal @@ -43,3 +43,10 @@ public type RateLimitPolicyList record { ListMeta metadata; RateLimitPolicy[] items; }; + +public type AIRateLimitPolicyList record { + string apiVersion = "dp.wso2.com/v1alpha3"; + string kind = "AIRateLimitPolicyList"; + ListMeta metadata; + AIRateLimitPolicy[] items; +}; diff --git a/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml b/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml index afe8fc75d..248af0f7c 100644 --- a/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml +++ b/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml @@ -360,6 +360,8 @@ components: $ref: "#/components/schemas/Certificate" resiliency: $ref: "#/components/schemas/Resiliency" + aiRatelimit: + $ref: "#/components/schemas/AIRatelimit" additionalProperties: false Certificate: type: object @@ -599,3 +601,57 @@ components: type: string default: string additionalProperties: false + AIRatelimit: + type: object + required: + - enabled + - token + - request + properties: + enabled: + type: boolean + default: true + token: + $ref: "#/components/schemas/TokenAIRL" + request: + $ref: "#/components/schemas/RequestAIRL" + TokenAIRL: + type: object + required: + - promptLimit + - completionLimit + - totalLimit + - unit + properties: + promptLimit: + type: integer + default: 0 + completionLimit: + type: integer + default: 0 + totalLimit: + type: integer + default": 0 + unit: + type: string + default: Minute + enum: + - Minute + - Hour + - Day + RequestAIRL: + type: object + required: + - requestLimit + - unit + properties: + requestLimit: + type: integer + default: 0 + unit: + type: string + default: Minute + enum: + - Minute + - Hour + - Day diff --git a/runtime/config-deployer-service/ballerina/types.bal b/runtime/config-deployer-service/ballerina/types.bal index dee63aa89..ed2abbccd 100644 --- a/runtime/config-deployer-service/ballerina/types.bal +++ b/runtime/config-deployer-service/ballerina/types.bal @@ -234,6 +234,38 @@ public type Resiliency record { RetryPolicy retryPolicy?; }; +# Configuration of AIRatelimit settings. +# +# + token - Configuration for the CircuitBreaker. +# + request - Configuration for the Timeout. +public type AIRatelimit record { + boolean enabled; + TokenAIRL token; + RequestAIRL request; +}; + +# Configuration for Token AI rate limit settings. +# +# + promptLimit - Limit for prompts within the specified unit. +# + completionLimit - Limit for completions within the specified unit. +# + totalLimit - Total limit combining prompt and completion counts. +# + unit - The time unit for the rate limits (Minute, Hour, Day). +public type TokenAIRL record { + int promptLimit; + int completionLimit; + int totalLimit; + string unit; +}; + +# Configuration for Request AI rate limit settings. +# +# + requestLimit - Limit for requests within the specified unit. +# + unit - The time unit for the request limits (Minute, Hour, Day). +public type RequestAIRL record { + int requestLimit; + string unit; +}; + # Configuration of CircuitBreaker settings. # # + maxConnectionPools - The maximum number of connection pools allowed. @@ -267,11 +299,13 @@ public type EndpointConfigurations record { # + endpointSecurity - The security configuration for the endpoint. # + certificate - The certificate configuration for the endpoint. # + resiliency - The resiliency configuration for the endpoint. +# + AIRatelimit - The AIRatelimit configuration for the AI ratelimit. public type EndpointConfiguration record { string|K8sService endpoint; EndpointSecurity endpointSecurity?; Certificate certificate?; Resiliency resiliency?; + AIRatelimit aiRatelimit?; }; # Configuration of OAuth2 Authentication type. diff --git a/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json b/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json index 19f723ee2..9ab31107d 100644 --- a/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json +++ b/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json @@ -532,6 +532,10 @@ "resiliency": { "$ref": "#/schemas/Resiliency", "description": "Resiliency configuration for the API endpoint." + }, + "aiRatelimit": { + "$ref": "#/schemas/AIRatelimit", + "description": "AI ratelimit configuration for the API endpoint." } }, "additionalProperties": false @@ -632,6 +636,93 @@ }, "additionalProperties": false }, + "AIRatelimit": { + "type": "object", + "required": [ + "enabled", + "token", + "request" + ], + "description": "Endpoint AI ratelimit related configurations of the API", + "properties": { + "enabled" : { + "type" : "boolean", + "default": true, + "description": "States whether the AI ratelimit is turned on or not" + }, + "token": { + "$ref": "#/schemas/TokenAIRL" + }, + "request": { + "$ref": "#/schemas/RequestAIRL" + } + }, + "additionalProperties": false + }, + "TokenAIRL": { + "type": "object", + "required": [ + "promptLimit", + "completionLimit", + "totalLimit", + "unit" + ], + "description": "Token limits configuration for AI rate limiting", + "properties": { + "promptLimit": { + "type": "integer", + "default": 0, + "description": "Limit for prompts within the specified unit" + }, + "completionLimit": { + "type": "integer", + "default": 0, + "description": "Limit for completions within the specified unit" + }, + "totalLimit": { + "type": "integer", + "default": 0, + "description": "Total limit combining prompt and completion counts" + }, + "unit": { + "type": "string", + "default": "Minute", + "enum": [ + "Minute", + "Hour", + "Day" + ], + "description": "The time unit for the rate limits" + } + }, + "additionalProperties": false + }, + "RequestAIRL": { + "type": "object", + "required": [ + "requestLimit", + "unit" + ], + "description": "Request limits configuration for AI rate limiting", + "properties": { + "requestLimit": { + "type": "integer", + "default": 0, + "description": "Limit for requests within the specified unit" + }, + "unit": { + "type": "string", + "default": "Minute", + "enum": [ + "Minute", + "Hour", + "Day" + ], + "description": "The time unit for the request limits" + } + }, + "additionalProperties": false + }, "CircuitBreaker": { "type": "object", "properties": { diff --git a/test/cucumber-tests/CRs/artifacts.yaml b/test/cucumber-tests/CRs/artifacts.yaml index 56e41d317..2bbeb801a 100644 --- a/test/cucumber-tests/CRs/artifacts.yaml +++ b/test/cucumber-tests/CRs/artifacts.yaml @@ -1777,3 +1777,25 @@ spec: cpu: 10m --- +apiVersion: dp.wso2.com/v1alpha3 +kind: AIProvider +metadata: + name: llm-provider-1 + namespace: apk-integration-test +spec: + providerName : "AzureAI" + providerAPIVersion : "2024-06-01" + organization : "default" + model: + in: "Body" + value: "model" + rateLimitFields: + promptTokens: + in: "Body" + value: "usage.prompt_tokens" + completionToken: + in: "Body" + value: "usage.completion_tokens" + totalToken: + in: "Body" + value: "usage.total_tokens" diff --git a/test/cucumber-tests/src/test/resources/artifacts/apk-confs/backend_based_airl_conf.yaml b/test/cucumber-tests/src/test/resources/artifacts/apk-confs/backend_based_airl_conf.yaml new file mode 100644 index 000000000..ffa4a2078 --- /dev/null +++ b/test/cucumber-tests/src/test/resources/artifacts/apk-confs/backend_based_airl_conf.yaml @@ -0,0 +1,41 @@ +--- +name: "BackendBasedAIRL" +basePath: "/backend-based-airl" +id: "backend-based-airl" +version: "1.0.0" +type: "REST" +defaultVersion: false +subscriptionValidation: false +aiProvider: + name: llm-provider-1 + apiVersion: “2024.06.01” +endpointConfigurations: + production: + endpoint: "http://llm-service:80" + aiRatelimit: + enabled: true + token: + promptLimit: 5000 + completionLimit: 10000 + totalLimit: 15000 + unit: Minute + request: + requestLimit: 6000 + unit: Minute +operations: +- target: "/employee" + verb: "GET" + secured: true + scopes: [] +- target: "/employee" + verb: "POST" + secured: true + scopes: [] +- target: "/employee/{employeeId}" + verb: "PUT" + secured: true + scopes: [] +- target: "/employee/{employeeId}" + verb: "DELETE" + secured: true + scopes: [] diff --git a/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature b/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature index 2ad7bebe3..13fb3c024 100644 --- a/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature +++ b/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature @@ -1,24 +1,25 @@ Feature: API backend based AI ratelimit Feature + Scenario: backend based AI ratelimit token detail comes in the body. Given The system is ready And I have a valid subscription Then I set headers - |Authorization|bearer ${accessToken}| + | Authorization | bearer ${accessToken} | And I wait for next minute strictly And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4999 | + | x-ratelimit-remaining | 4999 | And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4699 | + | x-ratelimit-remaining | 4699 | And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body&prompt_tokens=40000" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4399 | + | x-ratelimit-remaining | 4399 | And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body&prompt_tokens=40000" with body "" Then the response status code should be 429 @@ -34,26 +35,27 @@ Feature: API backend based AI ratelimit Feature And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body" with body "" Then the response status code should be 429 + Scenario: backend based AI ratelimit token detail comes in the header. Given The system is ready And I have a valid subscription Then I set headers - |Authorization|bearer ${accessToken}| + | Authorization | bearer ${accessToken} | And I wait for next minute strictly And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4999 | + | x-ratelimit-remaining | 4999 | And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4699 | + | x-ratelimit-remaining | 4699 | And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header&prompt_tokens=40000" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4399 | + | x-ratelimit-remaining | 4399 | And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header&prompt_tokens=40000" with body "" Then the response status code should be 429 @@ -69,18 +71,69 @@ Feature: API backend based AI ratelimit Feature And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header" with body "" Then the response status code should be 429 + Scenario: backend based AI ratelimit token detail comes in the header but a body configured api checked. Given The system is ready And I have a valid subscription Then I set headers - |Authorization|bearer ${accessToken}| + | Authorization | bearer ${accessToken} | And I wait for next minute strictly And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=header" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4999 | + | x-ratelimit-remaining | 4999 | And I wait for 3 seconds And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=header" with body "" Then the response status code should be 200 And the response headers should contain - | x-ratelimit-remaining | 4998 | \ No newline at end of file + | x-ratelimit-remaining | 4998 | + + Scenario: apk conf backend based AI ratelimit token detail comes in the body. + Given The system is ready + And I have a valid subscription + When I use the APK Conf file "artifacts/apk-confs/backend_based_airl_conf.yaml" + And the definition file "artifacts/definitions/employees_api.json" + And make the API deployment request + Then the response status code should be 200 + Then I set headers + | Authorization | bearer ${accessToken} | + And I wait for next minute strictly + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body "" + Then the response status code should be 200 + And the response headers should contain + | x-ratelimit-remaining | 4999 | + And I wait for 3 seconds + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body "" + Then the response status code should be 200 + And the response headers should contain + | x-ratelimit-remaining | 4699 | + And I wait for 3 seconds + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&prompt_tokens=40000" with body "" + Then the response status code should be 200 + And the response headers should contain + | x-ratelimit-remaining | 4399 | + And I wait for 3 seconds + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&prompt_tokens=40000" with body "" + Then the response status code should be 429 + And I wait for next minute strictly + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&completion_tokens=40000" with body "" + Then the response status code should be 200 + And I wait for 3 seconds + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body "" + Then the response status code should be 429 + And I wait for next minute strictly + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&total_tokens=40000" with body "" + Then the response status code should be 200 + And I wait for 3 seconds + And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body "" + Then the response status code should be 429 + + Scenario Outline: Undeploy API + Given The system is ready + And I have a valid subscription + When I undeploy the API whose ID is "" + Then the response status code should be + + Examples: + | apiID | expectedStatusCode | + | backend-based-airl | 202 |