Skip to content

Commit

Permalink
bug fixes for remaining tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
anevjes committed Jul 22, 2024
1 parent ab45a97 commit e022dd8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
2 changes: 1 addition & 1 deletion aisentry/facade/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ async def stream_response(response):
if proxy_streaming_response.headers.get("x-ratelimit-remaining-requests") is not None:
endpoint_info["x-ratelimit-remaining-requests"]=response.headers["x-ratelimit-remaining-requests"]
else:
endpoint_info["x-ratelimit-remaining-tokens"]=0
endpoint_info["x-ratelimit-remaining-requests"]=0

utc_now = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
request_body = json.loads(body)
Expand Down
22 changes: 20 additions & 2 deletions aisentry/utils/ai_sentry_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def get_endpoints_from_poolname(poolname, json_data):

def num_tokens_from_string(string: str, encoding_name: str) -> int:
"""Returns the number of tokens in a text string."""
logger.info(f"encoding_name: {encoding_name}")

encoding = tiktoken.encoding_for_model(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
Expand Down Expand Up @@ -48,9 +50,25 @@ def select_pool(pool_endpoints, pool_name):

async def getNextAvailableEndpointInfo(open_ai_endpoint_availability_stats):
logger.info(f"open_ai_endpoint_availability_stats: {open_ai_endpoint_availability_stats}")
remaining_requests = sorted(open_ai_endpoint_availability_stats ,key=lambda x: x['x-ratelimit-remaining-requests'], reverse=True)[0]
remaining_tokens = sorted(open_ai_endpoint_availability_stats ,key=lambda x: x['x-ratelimit-remaining-tokens'], reverse=True)[0]
remaining_requests = sorted(open_ai_endpoint_availability_stats ,key=lambda x: int(x['x-ratelimit-remaining-requests'], reverse=True))[0]
remaining_tokens = sorted(open_ai_endpoint_availability_stats ,key=lambda x: int(x['x-ratelimit-remaining-tokens'], reverse=True))[0]
logger.info(f"Next available endpoint: {remaining_requests['url']}")

# Add a new key 'max_limit' to each dictionary that is the maximum of 'x-ratelimit-remaining-requests' and 'x-ratelimit-remaining-tokens'
# for endpoint in open_ai_endpoint_availability_stats:
# endpoint['max_limit'] = max(endpoint['x-ratelimit-remaining-requests'], endpoint['x-ratelimit-remaining-tokens'])

# Sort based on 'max_limit'
#sorted_endpoints = sorted(open_ai_endpoint_availability_stats ,key=lambda x: x['max_limit'], reverse=True)

# # Select the first endpoint with 'max_limit' greater than zero
# highest_endpoint = next((endpoint for endpoint in sorted_endpoints if endpoint['max_limit'] > 0), None)

# if highest_endpoint is not None:
# logger.info(highest_endpoint)
# else:
# logger.info("No endpoint has a max_limit greater than zero.")

return remaining_requests


Expand Down

0 comments on commit e022dd8

Please sign in to comment.