Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added fix to io_utils.list_files function for instances where column … #42

Merged
merged 12 commits into from
Jan 8, 2024
Merged
17 changes: 15 additions & 2 deletions src/alpineer/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import itertools
import os
import pathlib
import re
import warnings
from typing import List

Expand Down Expand Up @@ -81,7 +82,13 @@ def list_files(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
if any([substr == os.path.splitext(file)[0] for substr in substrs])
]
else:
matches = [file for file in files if any([substr in file for substr in substrs])]
matches = []
for substr in substrs:
substr_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", substr)))
for file in files:
file_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", file)))
if set(substr_pattern).issubset(file_pattern):
matches.append(file)

return matches

Expand Down Expand Up @@ -226,6 +233,12 @@ def list_folders(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
if any([substr == os.path.splitext(folder)[0] for substr in substrs])
]
else:
matches = [folder for folder in folders if any([substr in folder for substr in substrs])]
matches = []
for substr in substrs:
substr_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", substr)))
for folder in folders:
folder_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", folder)))
if set(substr_pattern).issubset(folder_pattern):
matches.append(folder)

return matches
58 changes: 57 additions & 1 deletion tests/io_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,32 @@ def test_list_files():
)
assert sorted(get_hidden_files) == [".chan-metadata.tiff"]

# test delimiter functionality of substr matching
with tempfile.TemporaryDirectory() as temp_dir:
filenames = [
"fov1.tiff",
"fov1_test.tiff",
"fov10.tiff",
"fov2.tiff",
"fov2_test.tiff",
"fov20.tiff",
"fov3.tiff",
"fov3_test.tiff",
"fov30.tiff",
]
for filename in filenames:
pathlib.Path(os.path.join(temp_dir, filename)).touch()

# test substrs is not list (single string)
get_txt = io_utils.list_files(temp_dir, substrs="fov1")
assert sorted(get_txt) == sorted(["fov1.tiff", "fov1_test.tiff"])

# test substrs is list
get_test_and_other = io_utils.list_files(temp_dir, substrs=["fov1", "fov2"])
assert sorted(get_test_and_other) == sorted(
["fov1.tiff", "fov1_test.tiff", "fov2.tiff", "fov2_test.tiff"]
)


def test_remove_file_extensions():
# test a mixture of file paths and extensions
Expand Down Expand Up @@ -206,7 +232,7 @@ def test_list_folders():
temp_dir, substrs=["test_", "other"], exact_match=False
)
assert sorted(get_test_and_other) == sorted(
["Ntest_csv", "test_csv", "test_csv1", "test_csv2", "test_out", "othertf_txt"]
["test_csv", "test_csv1", "test_csv2", "test_out"]
)

# Test hidden files
Expand Down Expand Up @@ -243,3 +269,33 @@ def test_list_folders():
temp_dir, substrs=".hidden_dir", exact_match=True, ignore_hidden=False
)
assert get_hidden_dirs == [".hidden_dir"]

# test delimiter functionality of substr matching
with tempfile.TemporaryDirectory() as temp_dir:
dirnames = [
"test1",
"test1_folder",
"test10",
"test2",
"test2_folder",
"test20",
"test3",
"test3_folder",
"test30",
]

dirnames.sort()
for dirname in dirnames:
os.mkdir(os.path.join(temp_dir, dirname))

# test substrs is not list (single string)
get_txt = io_utils.list_folders(temp_dir, substrs="test1", exact_match=False)
assert sorted(get_txt) == sorted(["test1", "test1_folder"])

# test substrs is list
get_test_and_other = io_utils.list_folders(
temp_dir, substrs=["test1", "test2"], exact_match=False
)
assert sorted(get_test_and_other) == sorted(
["test1", "test1_folder", "test2", "test2_folder"]
)
Loading