add test for illegal usage

materialsvirtuallab · Sep 13, 2024 · 3e69e3c · 3e69e3c
1 parent cd3bbd1
commit 3e69e3c
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 21 deletions.
diff --git a/src/monty/io.py b/src/monty/io.py
@@ -164,7 +164,7 @@ def reverse_readfile(
 
 
 def reverse_readline(
-    m_file,
+    m_file,  # TODO: expected type is unclear
     blk_size: int = 4096,
     max_mem: int = 4_000_000,
 ) -> Iterator[str]:
@@ -179,14 +179,25 @@ def reverse_readline(
 
     Files larger than max_mem are read one block each time.
 
+    NOTE: this function expect a file stream, and m_file
+        should NOT be the name of the file.
+
+    TODO:
+    - is it possible to support binary file stream
+    - Test gzip seek speed (not supported previously)
+    - Test bzip2 seek speed (any improvement)
+         https://stackoverflow.com/questions/25734252/
+         why-is-seeking-from-the-end-of-a-file-allowed-for-
+         bzip2-files-and-not-gzip-files
+
     Reference:
         Based on code by Peter Astrand <[email protected]>, using
         modifications by Raymond Hettinger and Kevin German.
         http://code.activestate.com/recipes/439045-read-a-text-
         file-backwards-yet-another-implementat/
 
     Args:
-        m_file (File): File stream to read (backwards).
+        m_file: File stream to read (backwards).
         blk_size (int): The buffer size in bytes. Defaults to 4096.
         max_mem (int): The maximum amount of RAM to use in bytes,
             which determines when to reverse a file in-memory versus
@@ -196,6 +207,10 @@ def reverse_readline(
     Yields:
         Lines from the back of the file.
     """
+    # Check for illegal usage
+    if isinstance(m_file, str | Path):
+        raise TypeError("this function expect a file stream, not file name")
+
     # Generate line ending
     l_end: Literal["\r\n", "\n"] = _get_line_ending(m_file)
     len_l_end: Literal[1, 2] = cast(Literal[1, 2], len(l_end))
@@ -224,9 +239,6 @@ def reverse_readline(
 
         # For bz2 files, seek is expensive. It is therefore in our best
         # interest to maximize the block size within RAM usage limit.
-
-        # TODO: not sure if bzip2 has any improvement on seek, need test
-        # https://stackoverflow.com/questions/25734252/why-is-seeking-from-the-end-of-a-file-allowed-for-bzip2-files-and-not-gzip-files
         if isinstance(m_file, bz2.BZ2File):
             blk_size = min(max_mem, file_size)
 

diff --git a/tests/test_io.py b/tests/test_io.py
@@ -151,7 +151,7 @@ def test_empty_file(self):
                 for _line in reverse_readline(f):
                     pytest.fail("No error should be thrown.")
 
-    @pytest.mark.parametrize("ram", [4, 4_096, 4_0000_000])
+    @pytest.mark.parametrize("ram", [4, 4096, 4_0000_000])
     @pytest.mark.parametrize("l_end", ["\n", "\r\n"])
     def test_file_with_empty_lines(self, l_end, ram):
         """Empty lines should not be skipped.
@@ -170,24 +170,25 @@ def test_file_with_empty_lines(self, l_end, ram):
                 revert_contents = tuple(reverse_readline(file, max_mem=ram))
             assert revert_contents[::-1] == contents
 
-            # TODO: finish following tests
-            # # Test gzip file
-            # gzip_filename = f"{filename}.gz"
-            # with gzip.open(gzip_filename, "w") as file_out:
-            #     for line in contents:
-            #         file_out.write(line.encode())
+            # Test gzip file
+            gzip_filename = f"{filename}.gz"
+            with gzip.open(gzip_filename, "w") as file_out:
+                for line in contents:
+                    file_out.write(line.encode())
 
-            # revert_contents_gzip = tuple(reverse_readline(gzip_filename))
-            # assert revert_contents_gzip[::-1] == contents
+            with gzip.open(gzip_filename) as g_file:
+                revert_contents_gzip = tuple(reverse_readline(g_file))
+            assert revert_contents_gzip[::-1] == contents
 
-            # # Test bzip2 file
-            # bz2_filename = f"{filename}.bz2"
-            # with bz2.open(bz2_filename, "w") as file_out:
-            #     for line in contents:
-            #         file_out.write(line.encode())
+            # Test bzip2 file
+            bz2_filename = f"{filename}.bz2"
+            with bz2.open(bz2_filename, "w") as file_out:
+                for line in contents:
+                    file_out.write(line.encode())
 
-            # revert_contents_bz2 = tuple(reverse_readline(bz2_filename))
-            # assert revert_contents_bz2[::-1] == contents
+            with bz2.open(bz2_filename) as b_file:
+                revert_contents_bz2 = tuple(reverse_readline(b_file))
+            assert revert_contents_bz2[::-1] == contents
 
     @pytest.mark.parametrize("ram", [4, 4096, 4_0000_000])
     @pytest.mark.parametrize("l_end", ["\n", "\r\n"])
@@ -217,6 +218,12 @@ def test_line_ending(self, l_end, ram):
             #         assert line == contents[len(contents) - idx - 1]
             #         assert isinstance(line, str)
 
+    @pytest.mark.parametrize("file", ["./file", Path("./file")])
+    def test_illegal_usage(self, file):
+        with pytest.raises(TypeError, match="this function expect a file stream"):
+            for _ in reverse_readline(file):
+                pass
+
 
 class TestReverseReadfile:
     NUM_LINES = 3000