Skip to content

Commit

Permalink
fix(tokens): incorrect caching of async tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
RobertCraigie committed Jun 29, 2023
1 parent 31c7256 commit d7147ec
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 9 deletions.
32 changes: 26 additions & 6 deletions examples/tokens.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,32 @@
#!/usr/bin/env poetry run python

from anthropic import Anthropic
import asyncio

client = Anthropic()
from anthropic import Anthropic, AsyncAnthropic

text = "hello world!"

tokens = client.count_tokens(text)
print(f"'{text}' is {tokens} tokens")
def sync_tokens() -> None:
client = Anthropic()

assert tokens == 3
text = "hello world!"

tokens = client.count_tokens(text)
print(f"'{text}' is {tokens} tokens")

assert tokens == 3


async def async_tokens() -> None:
anthropic = AsyncAnthropic()

text = "fist message"
tokens = await anthropic.count_tokens(text)
print(f"'{text}' is {tokens} tokens")

text = "second message"
tokens = await anthropic.count_tokens(text)
print(f"'{text}' is {tokens} tokens")


sync_tokens()
asyncio.run(async_tokens())
9 changes: 6 additions & 3 deletions src/anthropic/_tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@ def _get_tokenizer_cache_path() -> Path:


@lru_cache(maxsize=None)
def _load_tokenizer(raw: str) -> Tokenizer:
return Tokenizer.from_str(raw)


def sync_get_tokenizer() -> Tokenizer:
tokenizer_path = _get_tokenizer_cache_path()
text = tokenizer_path.read_text()
return Tokenizer.from_str(text)
return _load_tokenizer(text)


@lru_cache(maxsize=None)
async def async_get_tokenizer() -> Tokenizer:
tokenizer_path = AsyncPath(_get_tokenizer_cache_path())
text = await tokenizer_path.read_text()
return Tokenizer.from_str(text)
return _load_tokenizer(text)

0 comments on commit d7147ec

Please sign in to comment.