diff --git a/segmentron/data/dataloader/utils.py b/segmentron/data/dataloader/utils.py index c0bd1ad..f14abc0 100644 --- a/segmentron/data/dataloader/utils.py +++ b/segmentron/data/dataloader/utils.py @@ -66,4 +66,23 @@ def download_url(url, root, filename=None, md5=None): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) \ No newline at end of file + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) \ No newline at end of file diff --git a/segmentron/data/downloader/pascal_voc.py b/segmentron/data/downloader/pascal_voc.py index be7c2b2..1a5ac91 100644 --- a/segmentron/data/downloader/pascal_voc.py +++ b/segmentron/data/downloader/pascal_voc.py @@ -41,7 +41,26 @@ def download_voc(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) ##################################################################################### @@ -56,7 +75,26 @@ def download_aug(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) shutil.move(os.path.join(path, 'benchmark_RELEASE'), os.path.join(path, 'VOCaug')) filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']