Fixed typo in error messages in chunkerify().

umarbutler · May 18, 2024 · 3b35b1e · 3b35b1e
1 parent 3d1f7f7
commit 3b35b1e
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 3 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 ## Changelog 🔄
 All notable changes to `semchunk` will be documented here. This project adheres to [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.1] - 2024-05-18
+### Fixed
+- Fixed typo in error messages in `chunkerify()` where it was referred to as `make_chunker()`.
+
 ## [0.3.0] - 2024-05-18
 ### Added
 - Introduced the `chunkerify()` function, which constructs a chunker from a tokenizer or token counter that can be reused and can also chunk multiple texts in a single call. The resulting chunker speeds up chunking by 40.4% thanks, in large part, to a token counter that avoid having to count the number of tokens in a text when the number of characters in the text exceed a certain threshold, courtesy of [@R0bk](https://github.com/R0bk) ([#3](https://github.com/umarbutler/semchunk/pull/3)) ([337a186](https://github.com/umarbutler/semchunk/pull/3/commits/337a18615f991076b076262288b0408cb162b48c)).

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "semchunk"
-version = "0.3.0"
+version = "0.3.1"
 authors = [
   {name="Umar Butler", email="[email protected]"},
 ]

diff --git a/src/semchunk/semchunk.py b/src/semchunk/semchunk.py
@@ -178,7 +178,7 @@ def chunkerify(
                 tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_or_token_counter)
 
             except Exception:
-                raise ValueError(f'"{tokenizer_or_token_counter}" was provided to `semchunk.make_chunker` as the name of a tokenizer but neither `tiktoken` nor `transformers` have a tokenizer by that name. Perhaps they are not installed or maybe there is a typo in that name?')
+                raise ValueError(f'"{tokenizer_or_token_counter}" was provided to `semchunk.chunkerify` as the name of a tokenizer but neither `tiktoken` nor `transformers` have a tokenizer by that name. Perhaps they are not installed or maybe there is a typo in that name?')
 
         tokenizer_or_token_counter = tokenizer
 
@@ -206,7 +206,7 @@ def chunkerify(
                     chunk_size -= len(tokenizer_or_token_counter.encode(''))
 
         else:
-            raise ValueError("Your desired chunk size was not passed to `semchunk.make_chunker` and the provided tokenizer either lacks an attribute named 'model_max_length' or that attribute is not an integer. Either specify a chunk size or provide a tokenizer that has a 'model_max_length' attribute that is an integer.")
+            raise ValueError("Your desired chunk size was not passed to `semchunk.chunkerify` and the provided tokenizer either lacks an attribute named 'model_max_length' or that attribute is not an integer. Either specify a chunk size or provide a tokenizer that has a 'model_max_length' attribute that is an integer.")
 
     # If we have been given a tokenizer, construct a token counter from it.
     if hasattr(tokenizer_or_token_counter, 'encode'):