From aa6142f0fe31158404f00f5bc8a4aa8becd8ca2d Mon Sep 17 00:00:00 2001 From: Umar Butler Date: Mon, 11 Mar 2024 15:28:41 +1100 Subject: [PATCH] Ensured that memoization does not overwrite `chunk()`'s function signature. --- CHANGELOG.md | 5 +++++ pyproject.toml | 2 +- src/semchunk/semchunk.py | 7 ++++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aed70cc..25c4020 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ ## Changelog 🔄 All notable changes to `semchunk` will be documented here. This project adheres to [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.3] - 2024-03-11 +### Fixed +- Ensured that memoization does not overwrite `chunk()`'s function signature. + ## [0.2.2] - 2024-02-05 ### Fixed - Ensured that the `memoize` argument is passed back to `chunk()` in recursive calls. @@ -36,6 +40,7 @@ All notable changes to `semchunk` will be documented here. This project adheres ### Added - Added the `chunk()` function, which splits text into semantically meaningful chunks of a specified size as determined by a provided token counter. +[0.2.3]: https://github.com/umarbutler/semchunk/compare/v0.2.2...v0.2.3 [0.2.2]: https://github.com/umarbutler/semchunk/compare/v0.2.1...v0.2.2 [0.2.1]: https://github.com/umarbutler/semchunk/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/umarbutler/semchunk/compare/v0.1.2...v0.2.0 diff --git a/pyproject.toml b/pyproject.toml index 4995291..f2c0158 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "semchunk" -version = "0.2.2" +version = "0.2.3" authors = [ {name="Umar Butler", email="umar@umar.au"}, ] diff --git a/src/semchunk/semchunk.py b/src/semchunk/semchunk.py index c5a9b4e..ae40557 100644 --- a/src/semchunk/semchunk.py +++ b/src/semchunk/semchunk.py @@ -1,5 +1,5 @@ import re -from functools import cache +from functools import cache, wraps _memoised_token_counters = {} """A map of token counters to their memoised versions.""" @@ -45,7 +45,6 @@ def _split_text(text: str) -> tuple[str, bool, list[str]]: # Return the splitter and the split text. return splitter, splitter_is_whitespace, text.split(splitter) -@cache def chunk(text: str, chunk_size: int, token_counter: callable, memoize: bool=True, _recursion_depth: int = 0) -> list[str]: """Split text into semantically meaningful chunks of a specified size as determined by the provided token counter. @@ -113,4 +112,6 @@ def chunk(text: str, chunk_size: int, token_counter: callable, memoize: bool=Tru if not _recursion_depth: chunks = list(filter(None, chunks)) - return chunks \ No newline at end of file + return chunks + +chunk = wraps(chunk)(cache(chunk)) \ No newline at end of file