fix(tokens): incorrect caching of async tokenizer

anthropics · Jun 29, 2023 · d7147ec · d7147ec
1 parent 31c7256
commit d7147ec
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 9 deletions.
diff --git a/examples/tokens.py b/examples/tokens.py
@@ -1,12 +1,32 @@
 #!/usr/bin/env poetry run python
 
-from anthropic import Anthropic
+import asyncio
 
-client = Anthropic()
+from anthropic import Anthropic, AsyncAnthropic
 
-text = "hello world!"
 
-tokens = client.count_tokens(text)
-print(f"'{text}' is {tokens} tokens")
+def sync_tokens() -> None:
+    client = Anthropic()
 
-assert tokens == 3
+    text = "hello world!"
+
+    tokens = client.count_tokens(text)
+    print(f"'{text}' is {tokens} tokens")
+
+    assert tokens == 3
+
+
+async def async_tokens() -> None:
+    anthropic = AsyncAnthropic()
+
+    text = "fist message"
+    tokens = await anthropic.count_tokens(text)
+    print(f"'{text}' is {tokens} tokens")
+
+    text = "second message"
+    tokens = await anthropic.count_tokens(text)
+    print(f"'{text}' is {tokens} tokens")
+
+
+sync_tokens()
+asyncio.run(async_tokens())
diff --git a/src/anthropic/_tokenizers.py b/src/anthropic/_tokenizers.py
@@ -14,14 +14,17 @@ def _get_tokenizer_cache_path() -> Path:
 
 
 @lru_cache(maxsize=None)
+def _load_tokenizer(raw: str) -> Tokenizer:
+    return Tokenizer.from_str(raw)
+
+
 def sync_get_tokenizer() -> Tokenizer:
     tokenizer_path = _get_tokenizer_cache_path()
     text = tokenizer_path.read_text()
-    return Tokenizer.from_str(text)
+    return _load_tokenizer(text)
 
 
-@lru_cache(maxsize=None)
 async def async_get_tokenizer() -> Tokenizer:
     tokenizer_path = AsyncPath(_get_tokenizer_cache_path())
     text = await tokenizer_path.read_text()
-    return Tokenizer.from_str(text)
+    return _load_tokenizer(text)