Skip to content

Commit

Permalink
Fix intermittent ratelimit timeout in envoy
Browse files Browse the repository at this point in the history
  • Loading branch information
Tharsanan1 committed Sep 11, 2024
1 parent 1ff618b commit afc47b2
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 2 deletions.
4 changes: 4 additions & 0 deletions adapter/internal/oasparser/envoyconf/http_filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ func getRateLimitFilter() *hcmv3.HttpFilter {
Domain: RateLimiterDomain,
FailureModeDeny: conf.Envoy.RateLimit.FailureModeDeny,
EnableXRatelimitHeaders: enableXRatelimitHeaders,
Timeout: &durationpb.Duration{
Nanos: (int32(conf.Envoy.RateLimit.RequestTimeoutInMillis) % 1000) * 1000000,
Seconds: conf.Envoy.RateLimit.RequestTimeoutInMillis / 1000,
},
RateLimitService: &envoy_config_ratelimit_v3.RateLimitServiceConfig{
TransportApiVersion: corev3.ApiVersion_V3,
GrpcService: &corev3.GrpcService{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ public void onNext(ProcessingRequest request) {
String orgAndAIRLPolicyValue = filterMetadataFromAuthZ.getFieldsMap().get(DYNAMIC_METADATA_KEY_FOR_ORGANIZATION_AND_AIRL_POLICY).getStringValue();
String aiRLSubsValue = filterMetadataFromAuthZ.getFieldsMap().get(DYNAMIC_METADATA_KEY_FOR_SUBSCRIPTION).getStringValue();
configs.add(new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_SUBSCRIPTION_BASED_AI_REQUEST_TOKEN_COUNT, orgAndAIRLPolicyValue, new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_AI_SUBSCRIPTION, aiRLSubsValue, usage.getPrompt_tokens())));
configs.add(new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_SUBSCRIPTION_BASED_AI_RESPONSE_TOKEN_COUNT, orgAndAIRLPolicyValue, new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_AI_SUBSCRIPTION, aiRLSubsValue, usage.getPrompt_tokens())));
configs.add(new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_SUBSCRIPTION_BASED_AI_TOTAL_TOKEN_COUNT, orgAndAIRLPolicyValue, new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_AI_SUBSCRIPTION, aiRLSubsValue, usage.getPrompt_tokens())));
configs.add(new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_SUBSCRIPTION_BASED_AI_RESPONSE_TOKEN_COUNT, orgAndAIRLPolicyValue, new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_AI_SUBSCRIPTION, aiRLSubsValue, usage.getCompletion_tokens())));
configs.add(new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_SUBSCRIPTION_BASED_AI_TOTAL_TOKEN_COUNT, orgAndAIRLPolicyValue, new RatelimitClient.KeyValueHitsAddend(DESCRIPTOR_KEY_FOR_AI_SUBSCRIPTION, aiRLSubsValue, usage.getTotal_tokens())));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public void shouldRatelimit(List<KeyValueHitsAddend> configs) {
.setHitsAddend(hitsAddend)
.build();
RateLimitResponse rateLimitResponse = stub.shouldRateLimit(rateLimitRequest);
System.out.println("ratelimit response: " + rateLimitResponse.getStatuses(0).getCurrentLimit() + " " + rateLimitResponse.getStatuses(0).getLimitRemaining());
System.out.println(rateLimitResponse.getOverallCode());
}
});
Expand Down

0 comments on commit afc47b2

Please sign in to comment.