From da0b25aba505606a1f08b77015298e5fc1700b12 Mon Sep 17 00:00:00 2001
From: Umar Butler <umar@umar.au>
Date: Mon, 5 Feb 2024 20:41:04 +1100
Subject: [PATCH] Ensured that the `memoize` argument is passed back to
 `chunk()` in recursive calls.

---
 CHANGELOG.md             | 5 +++++
 LICENCE                  | 2 +-
 README.md                | 2 +-
 pyproject.toml           | 2 +-
 src/semchunk/semchunk.py | 2 +-
 5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea79af0..aed70cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,10 @@
 ## Changelog 🔄
 All notable changes to `semchunk` will be documented here. This project adheres to [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.2] - 2024-02-05
+### Fixed
+- Ensured that the `memoize` argument is passed back to `chunk()` in recursive calls.
+
 ## [0.2.1] - 2023-11-09
 ### Added
 - Memoized `chunk()`.
@@ -32,6 +36,7 @@ All notable changes to `semchunk` will be documented here. This project adheres
 ### Added
 - Added the `chunk()` function, which splits text into semantically meaningful chunks of a specified size as determined by a provided token counter.
 
+[0.2.2]: https://github.com/umarbutler/semchunk/compare/v0.2.1...v0.2.2
 [0.2.1]: https://github.com/umarbutler/semchunk/compare/v0.2.0...v0.2.1
 [0.2.0]: https://github.com/umarbutler/semchunk/compare/v0.1.2...v0.2.0
 [0.1.2]: https://github.com/umarbutler/semchunk/compare/v0.1.1...v0.1.2
diff --git a/LICENCE b/LICENCE
index 627e8e0..fba0d79 100644
--- a/LICENCE
+++ b/LICENCE
@@ -1,4 +1,4 @@
-Copyright (c) 2023 Umar Butler
+Copyright (c) 2024 Umar Butler
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 3dfe878..0c3ff94 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ To ensure that chunks are as semantically meaningful as possible, `semchunk` use
 `semchunk` also relies on memoization to cache the results of token counters and the `chunk()` function, thereby improving performance.
 
 ## Benchmarks 📊
-On a desktop with a Ryzen 3600, 64 GB of RAM, Windows 11 and Python 3.12.0, it takes `semchunk` 25.29 seconds to split every sample in [NLTK's Gutenberg Corpus](https://www.nltk.org/howto/corpus.html#plaintext-corpora) into 512-token-long chunks (for context, the Corpus contains 18 texts and 3,001,260 tokens). By comparison, it takes [`semantic-text-splitter`](https://pypi.org/project/semantic-text-splitter/) 1 minute and 51.65 seconds to chunk the same texts into 512-token-long chunks — a difference of 77.35%.
+On a desktop with a Ryzen 3600, 64 GB of RAM, Windows 11 and Python 3.12.0, it takes `semchunk` 24.41s seconds to split every sample in [NLTK's Gutenberg Corpus](https://www.nltk.org/howto/corpus.html#plaintext-corpora) into 512-token-long chunks (for context, the Corpus contains 18 texts and 3,001,260 tokens). By comparison, it takes [`semantic-text-splitter`](https://pypi.org/project/semantic-text-splitter/) 1 minute and 48.01 seconds to chunk the same texts into 512-token-long chunks — a difference of 77.35%.
 
 The code used to benchmark `semchunk` and `semantic-text-splitter` is available [here](https://github.com/umarbutler/semchunk/blob/main/tests/bench.py).
 
diff --git a/pyproject.toml b/pyproject.toml
index c4f4408..4995291 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "semchunk"
-version = "0.2.1"
+version = "0.2.2"
 authors = [
   {name="Umar Butler", email="umar@umar.au"},
 ]
diff --git a/src/semchunk/semchunk.py b/src/semchunk/semchunk.py
index 1252bd8..c5a9b4e 100644
--- a/src/semchunk/semchunk.py
+++ b/src/semchunk/semchunk.py
@@ -77,7 +77,7 @@ def chunk(text: str, chunk_size: int, token_counter: callable, memoize: bool=Tru
         
         # If the split is over the chunk size, recursively chunk it.
         if token_counter(split) > chunk_size:
-            chunks.extend(chunk(split, chunk_size, token_counter=token_counter, _recursion_depth=_recursion_depth+1))
+            chunks.extend(chunk(split, chunk_size, token_counter=token_counter, memoize=memoize, _recursion_depth=_recursion_depth+1))
 
         # If the split is equal to or under the chunk size, merge it with all subsequent splits until the chunk size is reached.
         else: