Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Anthropic token counting #85

Merged
merged 12 commits into from
Nov 15, 2024
15 changes: 7 additions & 8 deletions tests/test_costs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
("gpt-4-vision-preview", 15),
("gpt-4o", 15),
("azure/gpt-4o", 15),
("claude-2.1", 4),
("claude-3-opus-latest", 11),
],
)
def test_count_message_tokens(model, expected_output):
Expand Down Expand Up @@ -74,8 +74,7 @@ def test_count_message_tokens(model, expected_output):
("gpt-4-vision-preview", 17),
("gpt-4o", 17),
("azure/gpt-4o", 17),
("claude-2.1", 4),

# ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages without extra inputs
],
)
def test_count_message_tokens_with_name(model, expected_output):
Expand Down Expand Up @@ -116,7 +115,7 @@ def test_count_message_tokens_invalid_model():
("gpt-4-vision-preview", 4),
("text-embedding-ada-002", 4),
("gpt-4o", 4),
("claude-2.1", 4)
# ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages
],
)
def test_count_string_tokens(model, expected_output):
Expand Down Expand Up @@ -154,9 +153,9 @@ def test_count_string_invalid_model():
(MESSAGES, "gpt-4-0613", Decimal("0.00045")),
(MESSAGES, "gpt-4-1106-preview", Decimal("0.00015")),
(MESSAGES, "gpt-4-vision-preview", Decimal("0.00015")),
(MESSAGES, "gpt-4o", Decimal("0.000075")),
(MESSAGES, "gpt-4o", Decimal("0.0000375")),
(MESSAGES, "azure/gpt-4o", Decimal("0.000075")),
(MESSAGES, "claude-2.1", Decimal("0.000032")),
(MESSAGES, "claude-3-opus-latest", Decimal("0.000165")),
(STRING, "text-embedding-ada-002", Decimal("0.0000004")),
],
)
Expand Down Expand Up @@ -191,9 +190,9 @@ def test_invalid_prompt_format():
(STRING, "gpt-4-0613", Decimal("0.00024")),
(STRING, "gpt-4-1106-preview", Decimal("0.00012")),
(STRING, "gpt-4-vision-preview", Decimal("0.00012")),
(STRING, "gpt-4o", Decimal("0.000060")),
(STRING, "gpt-4o", Decimal("0.00004")),
(STRING, "azure/gpt-4o", Decimal("0.000060")),
(STRING, "claude-2.1", Decimal("0.000096")),
# (STRING, "claude-3-opus-latest", Decimal("0.000096")), # NOTE: Claude only supports messages
(STRING, "text-embedding-ada-002", 0),
],
)
Expand Down
3 changes: 1 addition & 2 deletions tests/test_llama_index_callbacks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# test_llama_index.py
import pytest
from tokencost.callbacks import llama_index
from llama_index.core.callbacks.schema import CBEventType, EventPayload
from unittest.mock import MagicMock
from llama_index.core.callbacks.schema import EventPayload

# Mock the calculate_prompt_cost and calculate_completion_cost functions

Expand Down
2 changes: 1 addition & 1 deletion tokencost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
calculate_completion_cost,
calculate_prompt_cost,
calculate_all_costs_and_tokens,
calculate_cost_by_tokens
calculate_cost_by_tokens,
)
from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs
9 changes: 6 additions & 3 deletions tokencost/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ async def fetch_costs():
if response.status == 200:
return await response.json(content_type=None)
else:
raise Exception(f"Failed to fetch token costs, status code: {response.status}")
raise Exception(
f"Failed to fetch token costs, status code: {response.status}"
)


async def update_token_costs():
Expand All @@ -49,11 +51,12 @@ async def update_token_costs():
fetched_costs = await fetch_costs()
# Safely remove 'sample_spec' if it exists
TOKEN_COSTS.update(fetched_costs)
TOKEN_COSTS.pop('sample_spec', None)
TOKEN_COSTS.pop("sample_spec", None)
except Exception as e:
logger.error(f"Failed to update TOKEN_COSTS: {e}")
raise


with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
TOKEN_COSTS_STATIC = json.load(f)

Expand All @@ -63,4 +66,4 @@ async def update_token_costs():
TOKEN_COSTS = TOKEN_COSTS_STATIC
asyncio.run(update_token_costs())
except Exception:
logger.error('Failed to update token costs. Using static costs.')
logger.error("Failed to update token costs. Using static costs.")
88 changes: 57 additions & 31 deletions tokencost/costs.py
the-praxs marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

"""
Costs dictionary and utility tool for counting tokens
"""

import os
import tiktoken
import anthropic
from typing import Union, List, Dict
Expand All @@ -12,12 +12,30 @@

logger = logging.getLogger(__name__)

# TODO: Add Claude support
# https://www-files.anthropic.com/production/images/model_pricing_july2023.pdf
# Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet.
# https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts


def get_anthropic_token_count(messages: List[Dict[str, str]], model: str) -> int:
if not any(
supported_model in model for supported_model in [
"claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus"
]
):
raise ValueError(
f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts."
)
try:
return anthropic.Anthropic().beta.messages.count_tokens(
model=model,
messages=messages,
).input_tokens
except TypeError as e:
raise e
except Exception as e:
raise e


def strip_ft_model_name(model: str) -> str:
"""
Finetuned models format: ft:gpt-3.5-turbo:my-org:custom_suffix:id
Expand All @@ -42,14 +60,12 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
model = model.lower()
model = strip_ft_model_name(model)

# Anthropic token counting requires a valid API key
if "claude-" in model:
"""
Note that this is only accurate for older models, e.g. `claude-2.1`.
For newer models this can only be used as a _very_ rough estimate,
instead you should rely on the `usage` property in the response for exact counts.
"""
prompt = "".join(message["content"] for message in messages)
return count_string_tokens(prompt, model)
logger.warning(
"Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
)
return get_anthropic_token_count(messages, model)

try:
encoding = tiktoken.encoding_for_model(model)
Expand Down Expand Up @@ -80,8 +96,9 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
)
return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4o" in model:
print(
"Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
logger.warning(
"Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13."
)
return count_message_tokens(messages, model="gpt-4o-2024-05-13")
elif "gpt-4" in model:
logger.warning(
Expand Down Expand Up @@ -121,18 +138,9 @@ def count_string_tokens(prompt: str, model: str) -> int:
model = model.split("/")[-1]

if "claude-" in model:
"""
Note that this is only accurate for older models, e.g. `claude-2.1`.
For newer models this can only be used as a _very_ rough estimate,
instead you should rely on the `usage` property in the response for exact counts.
"""
if "claude-3" in model:
logger.warning(
"Warning: Claude-3 models are not yet supported. Returning num tokens assuming claude-2.1."
)
client = anthropic.Client()
token_count = client.count_tokens(prompt)
return token_count
raise ValueError(
"Warning: Anthropic does not support this method. Please use the `count_message_tokens` function for the exact counts."
)

try:
encoding = tiktoken.encoding_for_model(model)
Expand Down Expand Up @@ -200,13 +208,11 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal
)
if not isinstance(prompt, (list, str)):
raise TypeError(
f"""Prompt must be either a string or list of message objects.
it is {type(prompt)} instead.
"""
f"Prompt must be either a string or list of message objects but found {type(prompt)} instead."
)
prompt_tokens = (
count_string_tokens(prompt, model)
if isinstance(prompt, str)
if isinstance(prompt, str) and "claude-" not in model
else count_message_tokens(prompt, model)
)

Expand Down Expand Up @@ -235,7 +241,18 @@ def calculate_completion_cost(completion: str, model: str) -> Decimal:
f"""Model {model} is not implemented.
Double-check your spelling, or submit an issue/PR"""
)
completion_tokens = count_string_tokens(completion, model)

if not isinstance(completion, str):
raise TypeError(
f"Prompt must be a string but found {type(completion)} instead."
)

if "claude-" in model:
completion_list = [{"role": "assistant", "content": completion}]
# Anthropic appends some 13 additional tokens to the actual completion tokens
completion_tokens = count_message_tokens(completion_list, model) - 13
else:
completion_tokens = count_string_tokens(completion, model)

return calculate_cost_by_tokens(completion_tokens, model, "output")

Expand Down Expand Up @@ -264,10 +281,19 @@ def calculate_all_costs_and_tokens(
completion_cost = calculate_completion_cost(completion, model)
prompt_tokens = (
count_string_tokens(prompt, model)
if isinstance(prompt, str)
if isinstance(prompt, str) and "claude-" not in model
else count_message_tokens(prompt, model)
)
completion_tokens = count_string_tokens(completion, model)

if "claude-" in model:
logger.warning(
"Warning: Token counting is estimated for "
)
completion_list = [{"role": "assistant", "content": completion}]
# Anthropic appends some 13 additional tokens to the actual completion tokens
completion_tokens = count_message_tokens(completion_list, model) - 13
else:
completion_tokens = count_string_tokens(completion, model)

return {
"prompt_cost": prompt_cost,
Expand Down
60 changes: 40 additions & 20 deletions update_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
def diff_dicts(dict1, dict2):
diff_keys = dict1.keys() ^ dict2.keys()
differences = {k: (dict1.get(k), dict2.get(k)) for k in diff_keys}
differences.update({k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]})
differences.update(
{k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]}
)

if differences:
print("Differences found:")
Expand All @@ -24,56 +26,74 @@ def diff_dicts(dict1, dict2):
return False


with open('tokencost/model_prices.json', 'r') as f:
with open("tokencost/model_prices.json", "r") as f:
model_prices = json.load(f)

if diff_dicts(model_prices, tokencost.TOKEN_COSTS):
print('Updating model_prices.json')
with open('tokencost/model_prices.json', 'w') as f:
print("Updating model_prices.json")
with open("tokencost/model_prices.json", "w") as f:
json.dump(tokencost.TOKEN_COSTS, f, indent=4)
# Load the data
df = pd.DataFrame(tokencost.TOKEN_COSTS).T
df.loc[df.index[1:], 'max_input_tokens'] = df['max_input_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
df.loc[df.index[1:], 'max_tokens'] = df['max_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
df.loc[df.index[1:], "max_input_tokens"] = (
df["max_input_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
)
df.loc[df.index[1:], "max_tokens"] = (
df["max_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
)


# Updated function to format the cost or handle NaN


def format_cost(x):
if pd.isna(x):
return '--'
return "--"
else:
price_per_million = Decimal(str(x)) * Decimal(str(1_000_000))
# print(price_per_million)
normalized = price_per_million.normalize()
formatted_price = '{:2f}'.format(normalized)
formatted_price = "{:2f}".format(normalized)

formatted_price = formatted_price.rstrip('0').rstrip('.') if '.' in formatted_price else formatted_price + '.00'
formatted_price = (
formatted_price.rstrip("0").rstrip(".")
if "." in formatted_price
else formatted_price + ".00"
)

return f"${formatted_price}"


# Apply the formatting function using DataFrame.apply and lambda
df[['input_cost_per_token', 'output_cost_per_token']] = df[[
'input_cost_per_token', 'output_cost_per_token']].apply(lambda x: x.map(format_cost))
df[["input_cost_per_token", "output_cost_per_token"]] = df[
["input_cost_per_token", "output_cost_per_token"]
].apply(lambda x: x.map(format_cost))


column_mapping = {
'input_cost_per_token': 'Prompt Cost (USD) per 1M tokens',
'output_cost_per_token': 'Completion Cost (USD) per 1M tokens',
'max_input_tokens': 'Max Prompt Tokens',
'max_output_tokens': 'Max Output Tokens',
'model_name': 'Model Name'
"input_cost_per_token": "Prompt Cost (USD) per 1M tokens",
"output_cost_per_token": "Completion Cost (USD) per 1M tokens",
"max_input_tokens": "Max Prompt Tokens",
"max_output_tokens": "Max Output Tokens",
"model_name": "Model Name",
}

# Assuming the keys of the JSON data represent the model names and have been set as the index
df['Model Name'] = df.index
df["Model Name"] = df.index

# Apply the column renaming
df.rename(columns=column_mapping, inplace=True)

# Write the DataFrame with the correct column names as markdown to a file
with open('pricing_table.md', 'w') as f:
f.write(df[['Model Name', 'Prompt Cost (USD) per 1M tokens', 'Completion Cost (USD) per 1M tokens',
'Max Prompt Tokens', 'Max Output Tokens']].to_markdown(index=False))
with open("pricing_table.md", "w") as f:
f.write(
df[
[
"Model Name",
"Prompt Cost (USD) per 1M tokens",
"Completion Cost (USD) per 1M tokens",
"Max Prompt Tokens",
"Max Output Tokens",
]
].to_markdown(index=False)
)
Loading