From 7ac95f606d3082e5fa1b4136be748732e20de713 Mon Sep 17 00:00:00 2001 From: Umit Cavus Buyuksahin Date: Fri, 29 Apr 2022 15:11:21 +0200 Subject: [PATCH] #61: Delete files in bucket (#62) * Added delete method * Added test for delete method * Added small fixes to list_files guides * Added user guide for deletion * Added mock delete method to mock bucketfs * Update doc/user_guide/delete_file_in_bucket.rst Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> * Updated delete test * Prepared for the release Co-authored-by: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> --- doc/changes/changes_0.2.0.md | 11 +-- doc/user_guide/delete_file_in_bucket.py | 27 +++++++ doc/user_guide/delete_file_in_bucket.rst | 15 ++++ doc/user_guide/list_files_in_bucket.py | 4 +- doc/user_guide/list_files_in_bucket.rst | 2 +- doc/user_guide/user_guide.rst | 1 + .../abstract_bucketfs_location.py | 71 +++++++++++-------- .../bucketfs_location.py | 15 +++- exasol_bucketfs_utils_python/delete.py | 22 ++++++ exasol_bucketfs_utils_python/list_files.py | 6 +- .../localfs_mock_bucketfs_location.py | 6 ++ tests/test_delete_file.py | 52 ++++++++++++++ 12 files changed, 189 insertions(+), 43 deletions(-) create mode 100644 doc/user_guide/delete_file_in_bucket.py create mode 100644 doc/user_guide/delete_file_in_bucket.rst create mode 100644 exasol_bucketfs_utils_python/delete.py create mode 100644 tests/test_delete_file.py diff --git a/doc/changes/changes_0.2.0.md b/doc/changes/changes_0.2.0.md index 8c2c9da7..8623e873 100644 --- a/doc/changes/changes_0.2.0.md +++ b/doc/changes/changes_0.2.0.md @@ -1,19 +1,20 @@ -# BucketFs Utils Python 0.2.0, released t.b.d -Code name: t.b.d +# BucketFs Utils Python 0.2.0, released 2022-04-29 +Code name: Added methods to list files and delete files ## Summary - t.b.d +This version introduces two new methods that list files in a certain buckets and +delete file in bucket under a specific path. Furthermore, we used a fixed numpy +version build from source against the buffer overflow vulnerability in numpy. ## Features / Enhancements - #55: Added method to list files in bucket + - #61: Added method to delete file in bucket ## Bug Fixes - #54: Removed PosixPath conversion from alter session string -## Documentation - ## Refactoring - #58: Added Python type hints diff --git a/doc/user_guide/delete_file_in_bucket.py b/doc/user_guide/delete_file_in_bucket.py new file mode 100644 index 00000000..19629b4e --- /dev/null +++ b/doc/user_guide/delete_file_in_bucket.py @@ -0,0 +1,27 @@ +from pathlib import Path +from exasol_bucketfs_utils_python import upload, delete +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig + +connection_config = BucketFSConnectionConfig( + host="localhost", port=6666, + user="w", pwd="write", + is_https=False) +bucketfs_config = BucketFSConfig( + connection_config=connection_config, + bucketfs_name="bfsdefault") +bucket_config = BucketConfig( + bucket_name="default", + bucketfs_config=bucketfs_config) + +local_input_file_path = Path("local_input_file.txt") +path_in_bucket = "path/in/bucket/file.txt" +upload.upload_file_to_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + local_file_path=local_input_file_path) + +delete.delete_file_in_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket) diff --git a/doc/user_guide/delete_file_in_bucket.rst b/doc/user_guide/delete_file_in_bucket.rst new file mode 100644 index 00000000..02d74002 --- /dev/null +++ b/doc/user_guide/delete_file_in_bucket.rst @@ -0,0 +1,15 @@ + +######################### +Deleting file in BucketFS +######################### + +This library provides a function to delete the file in a bucket under a given +path. As in the example below, the file whose bucketfs path is given is deleted +with the provided delete method. + + + +Example: + +.. literalinclude:: delete_file_in_bucket.py + :language: python3 \ No newline at end of file diff --git a/doc/user_guide/list_files_in_bucket.py b/doc/user_guide/list_files_in_bucket.py index 7606ccd4..f4454b7a 100644 --- a/doc/user_guide/list_files_in_bucket.py +++ b/doc/user_guide/list_files_in_bucket.py @@ -22,7 +22,7 @@ bucket_file_path=path_in_bucket, local_file_path=local_input_file_path) -bucket_file_path = Path("path/in/bucket") +bucket_file_path = "path/in/bucket" files = list_files.list_files_in_bucketfs( bucket_config=bucket_config, - bucket_file_path=path_in_bucket) + bucket_file_path=bucket_file_path) diff --git a/doc/user_guide/list_files_in_bucket.rst b/doc/user_guide/list_files_in_bucket.rst index c0799bb9..5d9aa7f4 100644 --- a/doc/user_guide/list_files_in_bucket.rst +++ b/doc/user_guide/list_files_in_bucket.rst @@ -1,6 +1,6 @@ ##################################### -Listing files in bucket +Listing files in BucketFS ##################################### This library provides a function to list the files in the bucket under a given diff --git a/doc/user_guide/user_guide.rst b/doc/user_guide/user_guide.rst index 903f2153..32b3cab1 100644 --- a/doc/user_guide/user_guide.rst +++ b/doc/user_guide/user_guide.rst @@ -11,4 +11,5 @@ For a detailed explanation of the API, please refer to our :doc:`API Documentati upload_download_functions upload_github_release_to_bucket list_files_in_bucket + delete_file_in_bucket diff --git a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py index dee05390..f761f232 100644 --- a/exasol_bucketfs_utils_python/abstract_bucketfs_location.py +++ b/exasol_bucketfs_utils_python/abstract_bucketfs_location.py @@ -11,62 +11,75 @@ class AbstractBucketFSLocation(ABC): directly, if called from inside a UDF. """ @abstractmethod - def download_from_bucketfs_to_string(self, - bucket_file_path: str) -> str: + def download_from_bucketfs_to_string( + self, + bucket_file_path: str) -> str: pass @abstractmethod - def download_object_from_bucketfs_via_joblib(self, - bucket_file_path: str) -> Any: + def download_object_from_bucketfs_via_joblib( + self, + bucket_file_path: str) -> Any: pass @abstractmethod - def upload_string_to_bucketfs(self, - bucket_file_path: str, - string: str) -> \ - Tuple[ParseResult, PurePosixPath]: + def upload_string_to_bucketfs( + self, + bucket_file_path: str, + string: str) -> Tuple[ParseResult, PurePosixPath]: pass @abstractmethod - def upload_object_to_bucketfs_via_joblib(self, - object: Any, - bucket_file_path: str, - **kwargs) -> \ - Tuple[ParseResult, PurePosixPath]: + def upload_object_to_bucketfs_via_joblib( + self, + object: Any, + bucket_file_path: str, + **kwargs) -> Tuple[ParseResult, PurePosixPath]: pass @abstractmethod - def upload_fileobj_to_bucketfs(self, - fileobj: IO, - bucket_file_path: str) -> \ - Tuple[ParseResult, PurePosixPath]: + def upload_fileobj_to_bucketfs( + self, + fileobj: IO, + bucket_file_path: str) -> Tuple[ParseResult, PurePosixPath]: pass # TODO add missing upload/download functions @abstractmethod - def read_file_from_bucketfs_to_string(self, - bucket_file_path: str) -> str: + def read_file_from_bucketfs_to_string( + self, + bucket_file_path: str) -> str: pass @abstractmethod - def read_file_from_bucketfs_to_file(self, - bucket_file_path: str, - local_file_path: Path) -> None: + def read_file_from_bucketfs_to_file( + self, + bucket_file_path: str, + local_file_path: Path) -> None: pass @abstractmethod - def read_file_from_bucketfs_to_fileobj(self, - bucket_file_path: str, - fileobj: IO) -> None: + def read_file_from_bucketfs_to_fileobj( + self, + bucket_file_path: str, + fileobj: IO) -> None: pass @abstractmethod - def read_file_from_bucketfs_via_joblib(self, - bucket_file_path: str) -> Any: + def read_file_from_bucketfs_via_joblib( + self, + bucket_file_path: str) -> Any: pass @abstractmethod - def list_files_in_bucketfs(self, - bucket_file_path: str) -> Iterable[str]: + def list_files_in_bucketfs( + self, + bucket_file_path: str) -> Iterable[str]: + pass + + @abstractmethod + def delete_file_in_bucketfs( + self, + bucket_file_path: str) -> None: pass diff --git a/exasol_bucketfs_utils_python/bucketfs_location.py b/exasol_bucketfs_utils_python/bucketfs_location.py index cca5bc35..8f6e4865 100644 --- a/exasol_bucketfs_utils_python/bucketfs_location.py +++ b/exasol_bucketfs_utils_python/bucketfs_location.py @@ -1,7 +1,8 @@ -from typing import Any, Tuple, IO +from typing import Any, Tuple, IO, Iterable from pathlib import PurePosixPath, Path from urllib.parse import ParseResult -from exasol_bucketfs_utils_python import download, upload, list_files +from exasol_bucketfs_utils_python import download, upload, list_files, \ + delete from exasol_bucketfs_utils_python import load_file_from_local_fs as from_BFS from exasol_bucketfs_utils_python.bucket_config import BucketConfig @@ -114,8 +115,16 @@ def read_file_from_bucketfs_via_joblib( def list_files_in_bucketfs( self, - bucket_file_path: str) -> list: + bucket_file_path: str) -> Iterable[str]: return list_files.list_files_in_bucketfs( self.bucket_config, self.get_complete_file_path_in_bucket(bucket_file_path) ) + + def delete_file_in_bucketfs( + self, + bucket_file_path: str) -> None: + delete.delete_file_in_bucketfs( + self.bucket_config, + self.get_complete_file_path_in_bucket(bucket_file_path) + ) diff --git a/exasol_bucketfs_utils_python/delete.py b/exasol_bucketfs_utils_python/delete.py new file mode 100644 index 00000000..df748417 --- /dev/null +++ b/exasol_bucketfs_utils_python/delete.py @@ -0,0 +1,22 @@ +import requests +from exasol_bucketfs_utils_python import bucketfs_utils +from exasol_bucketfs_utils_python.bucketfs_utils import generate_bucket_http_url +from exasol_bucketfs_utils_python.bucket_config import BucketConfig + + +def delete_file_in_bucketfs( + bucket_config: BucketConfig, + bucket_file_path: str = "") -> None: + """ + Delete the file in bucket under a given path in BucketFS + + :param bucket_config: BucketConfig for the bucket to delete from + :param bucket_file_path: Path in the bucket to delete the file from + """ + if bucket_file_path is None: + raise ValueError("bucket_file_path can't be None") + + url = generate_bucket_http_url(bucket_config, bucket_file_path) + auth = bucketfs_utils.create_auth_object(bucket_config) + response = requests.delete(url.geturl(), auth=auth) + response.raise_for_status() diff --git a/exasol_bucketfs_utils_python/list_files.py b/exasol_bucketfs_utils_python/list_files.py index 2119edc0..20d247fc 100644 --- a/exasol_bucketfs_utils_python/list_files.py +++ b/exasol_bucketfs_utils_python/list_files.py @@ -9,10 +9,10 @@ def list_files_in_bucketfs(bucket_config: BucketConfig, bucket_file_path: str = "") -> Iterable[str]: """ - List files at the specified path in the bucket in BucketFs, line by line. + List files at the specified path in the bucket in BucketFS, line by line. - :param bucket_config: BucketConfig for the bucket to download from - :param bucket_file_path: Path in the bucket to download the file from + :param bucket_config: BucketConfig for the bucket to list files in + :param bucket_file_path: Path in the bucket to list the files in :return: The list of the files in the BucketFS as string. """ if bucket_file_path is None: diff --git a/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py b/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py index 38f8fa39..8fff0c05 100644 --- a/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py +++ b/exasol_bucketfs_utils_python/localfs_mock_bucketfs_location.py @@ -87,3 +87,9 @@ def list_files_in_bucketfs(self, path = self.get_complete_file_path_in_bucket(bucket_file_path) Path(path).parent.mkdir(parents=True, exist_ok=True) return ["."] + + def delete_file_in_bucketfs( + self, + bucket_file_path: str) -> None: + path = self.get_complete_file_path_in_bucket(bucket_file_path) + Path(path).unlink(missing_ok=True) diff --git a/tests/test_delete_file.py b/tests/test_delete_file.py new file mode 100644 index 00000000..6fc40ba6 --- /dev/null +++ b/tests/test_delete_file.py @@ -0,0 +1,52 @@ +from exasol_bucketfs_utils_python import upload, list_files, delete +from exasol_bucketfs_utils_python.bucket_config import BucketConfig +from exasol_bucketfs_utils_python.bucketfs_config import BucketFSConfig +from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig +from tests.test_load_fs_file_from_udf import delete_testfile_from_bucketfs + + +def test_delete_files(): + connection_config = BucketFSConnectionConfig( + host="localhost", port=6666, user="w", pwd="write", is_https=False) + bucketfs_config = BucketFSConfig( + connection_config=connection_config, bucketfs_name="bfsdefault") + bucket_config = BucketConfig( + bucket_name="default", bucketfs_config=bucketfs_config) + test_string = "test_string" + + path_list = ["delete_path/in/the/bucket/file.txt", "delete_path/file2.txt"] + try: + # upload files + for path_in_bucket in path_list: + upload.upload_string_to_bucketfs( + bucket_config=bucket_config, + bucket_file_path=path_in_bucket, + string=test_string) + + bucket_file_path_map = { + "delete_path": ["in/the/bucket/file.txt", "file2.txt"], + "delete_path/in/the/bucket/": ["file.txt"] + } + + # # check files exist + for bucket_path, expected in bucket_file_path_map.items(): + listed_files = list_files.list_files_in_bucketfs( + bucket_config, bucket_path) + for listed_file in listed_files: + assert listed_file in expected + + # delete files + for path_in_bucket in path_list: + delete.delete_file_in_bucketfs( + bucket_config, path_in_bucket) + + # # check files not exist + for bucket_path, expected in bucket_file_path_map.items(): + listed_files = list_files.list_files_in_bucketfs( + bucket_config, bucket_path) + assert not listed_files + finally: + for path_in_bucket in path_list: + delete_testfile_from_bucketfs( + file_path=path_in_bucket, + bucket_config=bucket_config)