-
Notifications
You must be signed in to change notification settings - Fork 0
/
chunker.py
38 lines (33 loc) · 1018 Bytes
/
chunker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def chunk(s, chunkSize=256, overlap=64):
words = s.split(" ")
return [" ".join(words[i:i+chunkSize]) for i in range(0, len(words), chunkSize - overlap)]
if __name__ == "__main__":
cases = [
(
"",
[""],
),
(
"hi",
["hi"],
),
(
"this is a test",
["this is a test"],
),
(
"this is a long test with more than ten words so that we can test overlap",
[
"this is a long test with more than ten words",
"ten words so that we can test overlap",
],
),
]
print("Testing chunk function.")
for case in cases:
input = case[0]
expected = case[1]
actual = chunk(input, 10, 2)
print("\nInput: %s \nExpected: %s\nActual: %s" % (str(input), str(expected), str(actual)))
assert actual == expected, "%s != %s" % (actual, expected)
print('\nAll tests passed.')