Skip to content

Commit

Permalink
add test for illegal usage
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielYang59 committed Sep 13, 2024
1 parent cd3bbd1 commit 3e69e3c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 21 deletions.
22 changes: 17 additions & 5 deletions src/monty/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def reverse_readfile(


def reverse_readline(
m_file,
m_file, # TODO: expected type is unclear
blk_size: int = 4096,
max_mem: int = 4_000_000,
) -> Iterator[str]:
Expand All @@ -179,14 +179,25 @@ def reverse_readline(
Files larger than max_mem are read one block each time.
NOTE: this function expect a file stream, and m_file
should NOT be the name of the file.
TODO:
- is it possible to support binary file stream
- Test gzip seek speed (not supported previously)
- Test bzip2 seek speed (any improvement)
https://stackoverflow.com/questions/25734252/
why-is-seeking-from-the-end-of-a-file-allowed-for-
bzip2-files-and-not-gzip-files
Reference:
Based on code by Peter Astrand <[email protected]>, using
modifications by Raymond Hettinger and Kevin German.
http://code.activestate.com/recipes/439045-read-a-text-
file-backwards-yet-another-implementat/
Args:
m_file (File): File stream to read (backwards).
m_file: File stream to read (backwards).
blk_size (int): The buffer size in bytes. Defaults to 4096.
max_mem (int): The maximum amount of RAM to use in bytes,
which determines when to reverse a file in-memory versus
Expand All @@ -196,6 +207,10 @@ def reverse_readline(
Yields:
Lines from the back of the file.
"""
# Check for illegal usage
if isinstance(m_file, str | Path):
raise TypeError("this function expect a file stream, not file name")

# Generate line ending
l_end: Literal["\r\n", "\n"] = _get_line_ending(m_file)
len_l_end: Literal[1, 2] = cast(Literal[1, 2], len(l_end))
Expand Down Expand Up @@ -224,9 +239,6 @@ def reverse_readline(

# For bz2 files, seek is expensive. It is therefore in our best
# interest to maximize the block size within RAM usage limit.

# TODO: not sure if bzip2 has any improvement on seek, need test
# https://stackoverflow.com/questions/25734252/why-is-seeking-from-the-end-of-a-file-allowed-for-bzip2-files-and-not-gzip-files
if isinstance(m_file, bz2.BZ2File):
blk_size = min(max_mem, file_size)

Expand Down
39 changes: 23 additions & 16 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def test_empty_file(self):
for _line in reverse_readline(f):
pytest.fail("No error should be thrown.")

@pytest.mark.parametrize("ram", [4, 4_096, 4_0000_000])
@pytest.mark.parametrize("ram", [4, 4096, 4_0000_000])
@pytest.mark.parametrize("l_end", ["\n", "\r\n"])
def test_file_with_empty_lines(self, l_end, ram):
"""Empty lines should not be skipped.
Expand All @@ -170,24 +170,25 @@ def test_file_with_empty_lines(self, l_end, ram):
revert_contents = tuple(reverse_readline(file, max_mem=ram))
assert revert_contents[::-1] == contents

# TODO: finish following tests
# # Test gzip file
# gzip_filename = f"{filename}.gz"
# with gzip.open(gzip_filename, "w") as file_out:
# for line in contents:
# file_out.write(line.encode())
# Test gzip file
gzip_filename = f"{filename}.gz"
with gzip.open(gzip_filename, "w") as file_out:
for line in contents:
file_out.write(line.encode())

# revert_contents_gzip = tuple(reverse_readline(gzip_filename))
# assert revert_contents_gzip[::-1] == contents
with gzip.open(gzip_filename) as g_file:
revert_contents_gzip = tuple(reverse_readline(g_file))
assert revert_contents_gzip[::-1] == contents

# # Test bzip2 file
# bz2_filename = f"{filename}.bz2"
# with bz2.open(bz2_filename, "w") as file_out:
# for line in contents:
# file_out.write(line.encode())
# Test bzip2 file
bz2_filename = f"{filename}.bz2"
with bz2.open(bz2_filename, "w") as file_out:
for line in contents:
file_out.write(line.encode())

# revert_contents_bz2 = tuple(reverse_readline(bz2_filename))
# assert revert_contents_bz2[::-1] == contents
with bz2.open(bz2_filename) as b_file:
revert_contents_bz2 = tuple(reverse_readline(b_file))
assert revert_contents_bz2[::-1] == contents

@pytest.mark.parametrize("ram", [4, 4096, 4_0000_000])
@pytest.mark.parametrize("l_end", ["\n", "\r\n"])
Expand Down Expand Up @@ -217,6 +218,12 @@ def test_line_ending(self, l_end, ram):
# assert line == contents[len(contents) - idx - 1]
# assert isinstance(line, str)

@pytest.mark.parametrize("file", ["./file", Path("./file")])
def test_illegal_usage(self, file):
with pytest.raises(TypeError, match="this function expect a file stream"):
for _ in reverse_readline(file):
pass


class TestReverseReadfile:
NUM_LINES = 3000
Expand Down

0 comments on commit 3e69e3c

Please sign in to comment.