diff --git a/.doctrees/api.doctree b/.doctrees/api.doctree
index 0677056a..21203631 100644
Binary files a/.doctrees/api.doctree and b/.doctrees/api.doctree differ
diff --git a/.doctrees/changes/unreleased.doctree b/.doctrees/changes/unreleased.doctree
index 0ecc1c0e..edcd7fce 100644
Binary files a/.doctrees/changes/unreleased.doctree and b/.doctrees/changes/unreleased.doctree differ
diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle
index ebd38dc9..7f26017d 100644
Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ
diff --git a/.doctrees/user_guide/basics.doctree b/.doctrees/user_guide/basics.doctree
index ad2c0234..35d91818 100644
Binary files a/.doctrees/user_guide/basics.doctree and b/.doctrees/user_guide/basics.doctree differ
diff --git a/.doctrees/user_guide/user_guide.doctree b/.doctrees/user_guide/user_guide.doctree
index 61a26204..894de3bb 100644
Binary files a/.doctrees/user_guide/user_guide.doctree and b/.doctrees/user_guide/user_guide.doctree differ
diff --git a/_modules/exasol/bucketfs/_buckets.html b/_modules/exasol/bucketfs/_buckets.html
index 2906dae5..9c8ae0cd 100644
--- a/_modules/exasol/bucketfs/_buckets.html
+++ b/_modules/exasol/bucketfs/_buckets.html
@@ -316,7 +316,7 @@
Source code for exasol.bucketfs._buckets
)
-classBucketLike(Protocol):
+
[docs]classBucketLike(Protocol):""" Definition of the Bucket interface. It is compatible with both on-premises an SaaS BucketFS systems.
@@ -348,7 +348,7 @@
Source code for exasol.bucketfs._buckets
no backslash at the start or at the end. """
- defdelete(self,path:str)->None:
+
[docs]defdelete(self,path:str)->None:""" Deletes a file in the bucket.
@@ -359,10 +359,10 @@
Source code for exasol.bucketfs._buckets
Q. What happens if the path points to a directory? A. Same. There are no directories as such in the BucketFS, hence
- a directory path is just a non-existent file.
- """
+ a directory path is just a non-existent file.
+ """
[docs]defupload(self,path:str,data:ByteString|BinaryIO)->None:""" Uploads a file to the bucket.
@@ -371,23 +371,23 @@
Source code for exasol.bucketfs._buckets
Q. What happens if the parent is missing? A. The bucket doesn't care about the structure of the file's path. Looking from the prospective
- of a file system, the bucket will create the missing parent, but in reality it will just
- store the data indexed by the provided path.
+ of a file system, the bucket will create the missing parent, but in reality it will just
+ store the data indexed by the provided path. Q. What happens if the path points to an existing file? A. That's fine, the file will be updated. Q. What happens if the path points to an existing directory? A. The bucket doesn't care about the structure of the file's path. Looking from the prospective
- of a file system, there will exist a file and directory with the same name.
+ of a file system, there will exist a file and directory with the same name. Q. How should the path look like? A. It should look like a POSIX path, but it should not contain any of the NTFS invalid characters.
- It can have the leading and/or ending backslashes, which will be subsequently removed.
- If the path doesn't conform to this format an BucketFsError will be raised.
- """
+ It can have the leading and/or ending backslashes, which will be subsequently removed.
+ If the path doesn't conform to this format an BucketFsError will be raised.
+ """
[docs]defdownload(self,path:str,chunk_size:int=8192)->Iterable[ByteString]:""" Downloads a file from the bucket. The content of the file will be provided in chunks of the specified size. The full content of the file can be constructed using
@@ -402,15 +402,31 @@
Source code for exasol.bucketfs._buckets
Q. What happens if the path points to a directory. A. Same, since a "directory" in the BucketFS is just a non-existent file.
- """
+ """
[docs]classBucket:"""
- Implementation of the On-Premises bucket.
+ Implementation of the BucketLike interface for the BucketFS in Exasol On-Premises database.
+
+ Args:
+ name:
+ Name of the bucket.
+ service:
+ Url where this bucket is hosted on.
+ username:
+ Username used for authentication.
+ password:
+ Password used for authentication.
+ verify:
+ Either a boolean, in which case it controls whether we verify
+ the server's TLS certificate, or a string, in which case it must be a path
+ to a CA bundle to use. Defaults to ``True``.
+ service_name:
+ Optional name of the BucketFS service. """
-
verify:bool|str=True,service_name:Optional[str]=None):
-"""
- Create a new bucket instance.
-
- Args:
- name:
- Name of the bucket.
- service:
- Url where this bucket is hosted on.
- username:
- Username used for authentication.
- password:
- Password used for authentication.
- verify:
- Either a boolean, in which case it controls whether we verify
- the server's TLS certificate, or a string, in which case it must be a path
- to a CA bundle to use. Defaults to ``True``.
- service_name:
- Optional name of the BucketFS service.
- """self._name=nameself._service=_parse_service_url(service)self._username=usernameself._password=passwordself._verify=verify
- self._service_name=service_name
+ self._service_name=service_namedef__str__(self):returnf"Bucket<{self.name} | on: {self._service}>"
@@ -482,13 +479,6 @@
Source code for exasol.bucketfs._buckets
[docs]defupload(self,path:str,data:ByteString|BinaryIO|Iterable[ByteString])->None:
-"""
- Uploads a file onto this bucket
-
- Args:
- path: in the bucket the file shall be associated with.
- data: raw content of the file.
- """url=_build_url(service_url=self._service,bucket=self.name,path=path)LOGGER.info("Uploading %s to bucket %s.",path,self.name)response=requests.put(url,data=data,auth=self._auth,verify=self._verify)
@@ -498,15 +488,6 @@
[docs]defdelete(self,path)->None:
-"""
- Deletes a specific file in this bucket.
-
- Args:
- path: points to the file which shall be deleted.
-
- Raises:
- A BucketFsError if the operation couldn't be executed successfully.
- """url=_build_url(service_url=self._service,bucket=self.name,path=path)LOGGER.info("Deleting %s from bucket %s.",path,self.name)response=requests.delete(url,auth=self._auth,verify=self._verify)
@@ -517,16 +498,6 @@
[docs]defdownload(self,path:str,chunk_size:int=8192)->Iterable[ByteString]:
-"""
- Downloads a specific file of this bucket.
-
- Args:
- path: which shall be downloaded.
- chunk_size: which shall be used for downloading.
-
- Returns:
- An iterable of binary chunks representing the downloaded file.
- """url=_build_url(service_url=self._service,bucket=self.name,path=path)LOGGER.info("Downloading %s using a chunk size of %d bytes from bucket %s.",
@@ -543,7 +514,20 @@
[docs]defdelete(self,path:str)->None:LOGGER.info("Deleting %s from the bucket.",path)withSaasAuthenticatedClient(base_url=self._url,token=self._pat,
@@ -591,9 +575,9 @@
[docs]defupload(self,path:str,data:ByteString|BinaryIO)->None:LOGGER.info("Uploading %s to the bucket.",path)# Q. The service can handle any characters in the path.# Do we need to check this path for presence of characters deemed
@@ -612,9 +596,9 @@
[docs]defdownload(self,path:str,chunk_size:int=8192)->Iterable[ByteString]:LOGGER.info("Downloading %s from the bucket.",path)withSaasAuthenticatedClient(base_url=self._url,token=self._pat,
@@ -635,15 +619,15 @@
[docs]classMountedBucket:"""
- Implementation of the Bucket interface backed by a normal file system.
+ Implementation of the BucketLike interface backed by a normal file system. The targeted use case is the access to the BucketFS files from a UDF. Arguments:
@@ -681,14 +665,14 @@
f.write(data)else:raiseValueError('upload called with unrecognised data type. '
- 'A valid data should be either ByteString or BinaryIO')
+ 'A valid data should be either ByteString or BinaryIO')
[docs]classPathLike(Protocol):
+"""
+ Definition of the PathLike view of the files in a Bucket.
+ """
+
+ @property
+ defname(self)->str:
+"""
+ A string representing the final path component, excluding the drive and root, if any.
+ """
+
+ @property
+ defsuffix(self)->str:
+"""
+ The file extension of the final component, if any.
+ """
+
+ @property
+ defroot(self)->str:
+"""
+ A string representing the root, if any.
+ """
+
+ @property
+ defparent(self)->str:
+"""
+ The logical parent of this path.
+ """
+
+
[docs]defas_uri(self)->str:
+"""
+ Represent the path as a file URI. Can be used to reconstruct the location/path.
+ """
+
+
[docs]defas_udf_path(self)->str:
+"""
+ This method is specific to a BucketFS flavour of the PathLike.
+ It returns a corresponding path, as it's seen from a UDF.
+ """
+
+
[docs]defexists(self)->bool:
+"""
+ Return True if the path points to an existing file or directory.
+ """
+
+
[docs]defis_dir(self)->bool:
+"""
+ Return True if the path points to a directory, False if it points to another kind of file.
+ """
+
+
[docs]defis_file(self)->bool:
+"""
+ Return True if the path points to a regular file, False if it points to another kind of file.
+ """
+
+
[docs]defread(self,chunk_size:int=8192)->Iterable[ByteString]:
+"""
+ Read the content of the file behind this path.
+
+ Only works for PathLike objects which return True for `is_file()`.
+
+ Args:
+ chunk_size: which will be yielded by the iterator.
+
+ Returns:
+ Returns an iterator which can be used to read the contents of the path in chunks.
+
+ Raises:
+ FileNotFoundError: If the file does not exist.
+ IsADirectoryError: if the pathlike object points to a directory.
+ """
+
+
[docs]defwrite(self,data:ByteString|BinaryIO|Iterable[ByteString])->None:
+"""
+ Writes data to this path.
+
+ Q. Should it create the parent directory if it doesn't exit?
+ A. Yes, it should.
+
+ After successfully writing to this path `exists` will yield true for this path.
+ If the file already existed it will be overwritten.
+
+ Args:
+ data: which shall be writen to the path.
+
+ Raises:
+ NotAFileError: if the pathlike object is not a file path.
+ """
+
+
[docs]defrm(self)->None:
+"""
+ Remove this file.
+
+ Note:
+ If `exists()` and is_file yields true for this path, the path will be deleted,
+ otherwise exception will be thrown.
+
+ Raises:
+ FileNotFoundError: If the file does not exist.
+ """
+
+
[docs]defrmdir(self,recursive:bool=False)->None:
+"""
+ Removes this directory.
+
+ Note: In order to stay close to pathlib, by default `rmdir` with `recursive`
+ set to `False` won't delete non-empty directories.
+
+ Args:
+ recursive: if true the directory itself and its entire contents (files and subdirs)
+ will be deleted. If false and the directory is not empty an error will be thrown.
+
+ Raises:
+ FileNotFoundError: If the file does not exist.
+ PermissionError: If recursive is false and the directory is not empty.
+ """
+
+
[docs]defjoinpath(self,*path_segments)->"PathLike":
+"""
+ Calling this method is equivalent to combining the path with each of the given path segments in turn.
+
+ Returns:
+ A new pathlike object pointing the combined path.
+ """
+
+
[docs]defwalk(self,top_down:bool=True)->Generator[tuple["PathLike",list[str],list[str]],None,None]:
+"""
+ Generate the file names in a directory tree by walking the tree either top-down or bottom-up.
+
+ Note:
+ Try to mimik https://docs.python.org/3/library/pathlib.html#pathlib.Path.walk as closely as possible,
+ except the functionality associated with the parameters of the `pathlib` walk.
+
+ Yields:
+ A 3-tuple of (dirpath, dirnames, filenames).
+ """
+
+
[docs]defiterdir(self)->Generator["PathLike",None,None]:
+"""
+ When the path points to a directory, yield path objects of the directory contents.
+
+ Note:
+ If `path` points to a file then `iterdir()` will yield nothing.
+
+ Yields:
+ All direct children of the pathlike object.
+ """
+
+ def__truediv__(self,other):
+"""
+ Overload / for joining, see also joinpath or `pathlib.Path`.
+ """
+
+
+def_remove_archive_suffix(path:PurePath)->PurePath:
+ whilepath.suffixinARCHIVE_SUFFIXES:
+ path=path.with_suffix('')
+ returnpath
+
+
+class_BucketFile:
+"""
+ A node in a perceived file structure of a bucket.
+ This can be a file, a directory or both.
+ """
+
+ def__init__(self,name:str,parent:str=''):
+ self._name=name
+ self._path=f'{parent}/{name}'ifparentelsename
+ self._children:Optional[dict[str,"_BucketFile"]]=None
+ self.is_file=False
+
+ @property
+ defname(self):
+ returnself._name
+
+ @property
+ defpath(self):
+ returnself._path
+
+ @property
+ defis_dir(self):
+ # The node can be a directory as well as a file,
+ # hence is the is_dir property, independent of is_file.
+ returnbool(self._children)
+
+ def__iter__(self):
+ ifself._childrenisNone:
+ returniter(())
+ returniter(self._children.values())
+
+ defget_child(self,child_name:str)->"_BucketFile":
+"""
+ Returns a child object with the specified name.
+ Creates one if it hasn't been created yet.
+ """
+ ifself._childrenisNone:
+ self._children={}
+ child:Optional["_BucketFile"]=None
+ else:
+ child=self._children.get(child_name)
+ ifchildisNone:
+ child=_BucketFile(child_name,self._path)
+ self._children[child_name]=child
+ returnchild
+
+
+classBucketPath:
+"""
+ Implementation of the PathLike view for files in a bucket.
+ """
+
+ def__init__(self,path:str|PurePath,bucket_api:BucketLike):
+"""
+ :param path: A pure path of a file or directory. The path is assumed to
+ be relative to the bucket. It is also permissible to have
+ this path in an absolute form, e.g. '/dir1/...'
+ or '\\\\abc\\...\\'.
+
+ All Pure Path methods of the PathLike protocol will be
+ delegated to this object.
+
+ :param bucket_api: An object supporting the Bucket API protocol.
+ """
+ self._path=PurePath(path)
+ self._bucket_api=bucket_api
+
+ def_get_relative_posix(self):
+"""
+ Returns the pure path of this object as a string, in the format of a bucket
+ file: 'dir/subdir/.../filename'.
+ """
+ path_str=str(self._path)[len(self._path.anchor):]
+ ifisinstance(self._path,PureWindowsPath):
+ path_str=path_str.replace('\\','/')
+ ifpath_str=='.':
+ path_str=''
+ returnpath_str
+
+ def_navigate(self)->Optional[_BucketFile]:
+"""
+ Reads the bucket file structure and navigates to the node corresponding to the
+ pure path of this object. Returns None if such node doesn't exist, otherwise
+ returns this node.
+ """
+ path_str=self._get_relative_posix()
+ path_len=len(path_str)
+ path_root:Optional[_BucketFile]=None
+ forfile_nameinself._bucket_api.files:
+ iffile_name.startswith(path_str):
+ path_root=path_rootor_BucketFile(self._path.name,str(self.parent))
+ node=path_root
+ forpartinfile_name[path_len:].split('/'):
+ ifpart:
+ node=node.get_child(part)
+ node.is_file=True
+ returnpath_root
+
+ @property
+ defname(self)->str:
+ returnself._path.name
+
+ @property
+ defsuffix(self)->str:
+ returnself._path.suffix
+
+ @property
+ defroot(self)->str:
+ returnself._path.root
+
+ @property
+ defparent(self)->str:
+ returnself._path.parent.name
+
+ defas_uri(self)->str:
+ returnself._path.as_uri()
+
+ defas_udf_path(self)->str:
+ returnstr(PurePath(self._bucket_api.udf_path)/
+ _remove_archive_suffix(self._path))
+
+ defexists(self)->bool:
+ returnself._navigate()isnotNone
+
+ defis_dir(self)->bool:
+ current_node=self._navigate()
+ return(current_nodeisnotNone)andcurrent_node.is_dir
+
+ defis_file(self)->bool:
+ current_node=self._navigate()
+ return(current_nodeisnotNone)andcurrent_node.is_file
+
+ defread(self,chunk_size:int=8192)->Iterable[ByteString]:
+ returnself._bucket_api.download(str(self._path),chunk_size)
+
+ defwrite(self,data:ByteString|BinaryIO|Iterable[ByteString])->None:
+ if(notisinstance(data,IOBase)andisinstance(data,Iterable)and
+ all(isinstance(chunk,ByteString)forchunkindata)):
+ data=b''.join(data)
+ self._bucket_api.upload(str(self._path),data)
+
+ defrm(self)->None:
+ current_node=self._navigate()
+ ifcurrent_nodeisNone:
+ raiseFileNotFoundError(errno.ENOENT,os.strerror(errno.ENOENT),str(self._path))
+ ifnotcurrent_node.is_file:
+ raiseIsADirectoryError(errno.EISDIR,os.strerror(errno.EISDIR),str(self._path))
+ self._bucket_api.delete(str(self._path))
+
+ defrmdir(self,recursive:bool=False)->None:
+ current_node=self._navigate()
+ ifcurrent_nodeisNone:
+ # There is no such thing as an empty directory. So, for the sake of
+ # compatibility with the PathLike, any directory that doesn't exist
+ # is considered empty.
+ return
+ ifnotcurrent_node.is_dir:
+ raiseNotADirectoryError(errno.ENOTDIR,os.strerror(errno.ENOTDIR),str(self._path))
+ ifrecursive:
+ self._rmdir_recursive(current_node)
+ else:
+ raiseOSError(errno.ENOTEMPTY,os.strerror(errno.ENOTEMPTY),str(self._path))
+
+ def_rmdir_recursive(self,node:_BucketFile):
+ forchildinnode:
+ self._rmdir_recursive(child)
+ ifnode.is_file:
+ self._bucket_api.delete(node.path)
+
+ defjoinpath(self,*path_segments)->PathLike:
+ # The path segments can be of either this type or an os.PathLike.
+ cls=type(self)
+ seg_paths=[seg._pathifisinstance(seg,cls)elsesegforseginpath_segments]
+ new_path=self._path.joinpath(*seg_paths)
+ returncls(new_path,self._bucket_api)
+
+ defwalk(self,top_down:bool=True)->Generator[tuple[PathLike,list[str],list[str]],None,None]:
+ current_node=self._navigate()
+ ifcurrent_nodeisNone:
+ raiseFileNotFoundError(errno.ENOENT,os.strerror(errno.ENOENT),str(self._path))
+
+ ifcurrent_node.is_dir:
+ yield fromself._walk_recursive(current_node,top_down)
+
+ def_walk_recursive(self,node:_BucketFile,top_down:bool)-> \
+ Generator[tuple[PathLike,list[str],list[str]],None,None]:
+
+ bucket_path=BucketPath(node.path,self._bucket_api)
+ dir_list:list[str]=[]
+ file_list:list[str]=[]
+ forchildinnode:
+ ifchild.is_file:
+ file_list.append(child.name)
+ ifchild.is_dir:
+ dir_list.append(child.name)
+
+ # The difference between the top_down and bottom_up is in the order of
+ # yielding the current node and its children. Top down - current node first,
+ # bottom_up - children first.
+ iftop_down:
+ yieldbucket_path,dir_list,file_list
+ forchildinnode:
+ ifchild.is_dir:
+ yield fromself._walk_recursive(child,top_down)
+ ifnottop_down:
+ yieldbucket_path,dir_list,file_list
+
+ defiterdir(self)->Generator[PathLike,None,None]:
+ current_node=self._navigate()
+ ifcurrent_nodeisNone:
+ raiseFileNotFoundError(errno.ENOENT,os.strerror(errno.ENOENT),str(self._path))
+ ifnotcurrent_node.is_dir:
+ raiseNotADirectoryError(errno.ENOTDIR,os.strerror(errno.ENOTDIR),str(self._path))
+
+ forchildincurrent_node:
+ yieldBucketPath(self._path/child.name,self._bucket_api)
+
+ def__truediv__(self,other):
+ # The other object can be of either this type or an os.PathLike.
+ cls=type(self)
+ new_path=self._path/(other._pathifisinstance(other,cls)elseother)
+ returncls(new_path,self._bucket_api)
+
+ def__str__(self):
+ returnstr(self._path)
+
+
+def_create_onprem_bucket(url:str,
+ username:str,
+ password:str,
+ bucket_name:str='default',
+ verify:bool|str=True,
+ service_name:Optional[str]=None
+ )->BucketLike:
+"""
+ Creates an on-prem bucket.
+ """
+ credentials={bucket_name:{'username':username,'password':password}}
+ service=Service(url,credentials,verify,service_name)
+ buckets=service.buckets
+ ifbucket_namenotinbuckets:
+ raiseBucketFsError(f'Bucket {bucket_name} does not exist.')
+ returnbuckets[bucket_name]
+
+
+def_create_saas_bucket(account_id:str,
+ database_id:str,
+ pat:str,
+ url:str='https://cloud.exasol.com'
+ )->BucketLike:
+"""
+ Creates a SaaS bucket.
+ """
+ returnSaaSBucket(url=url,account_id=account_id,database_id=database_id,pat=pat)
+
+
+def_create_mounted_bucket(service_name:str='bfsdefault',
+ bucket_name:str='default',
+ base_path:Optional[str]=None
+ )->BucketLike:
+"""
+ Creates a bucket mounted to a UDF.
+ """
+ bucket=MountedBucket(service_name,bucket_name,base_path)
+ ifnotbucket.root.exists():
+ raiseBucketFsError(f'Service {service_name} or bucket {bucket_name} do not exist.')
+ returnbucket
+
+
+
[docs]defbuild_path(**kwargs)->PathLike:
+"""
+ Creates a PathLike object based on a bucket in one of the BucketFS storage backends.
+ It provides the same interface for the following BucketFS implementations:
+ - On-Premises
+ - SaaS
+ - BucketFS files mounted as read-only directory in a UDF.
+
+ Arguments:
+ backend:
+ This is a mandatory parameter that indicates the BucketFS storage backend.
+ The available backends are defined in the StorageBackend enumeration,
+ Currently, these are "onprem", "saas" and "mounted". The parameter value
+ can be provided either as a string, e.g. "onprem", or as an enum, e.g.
+ StorageBackend.onprem.
+ path:
+ Optional parameter that selects a path within the bucket. If not provided
+ the returned PathLike objects corresponds to the root of the bucket. Hence,
+ an alternative way of creating a PathLike pointing to a particular file or
+ directory is as in the code below.
+ path = build_path(...) / "the_desired_path"
+
+ The rest of the arguments are backend specific.
+
+ On-prem arguments:
+ url:
+ Url of the BucketFS service, e.g. `http(s)://127.0.0.1:2580`.
+ username:
+ BucketFS username (generally, different from the DB username).
+ password:
+ BucketFS user password.
+ bucket_name:
+ Name of the bucket. Currently, a PathLike cannot span multiple buckets.
+ verify:
+ Either a boolean, in which case it controls whether we verify the server's
+ TLS certificate, or a string, in which case it must be a path to a CA bundle
+ to use. Defaults to ``True``.
+ service_name:
+ Optional name of the BucketFS service.
+
+ SaaS arguments:
+ url:
+ Url of the Exasol SaaS. Defaults to 'https://cloud.exasol.com'.
+ account_id:
+ SaaS user account ID, e.g. 'org_LVeOj4pwXhPatNz5'
+ (given example is not a valid ID of an existing account).
+ database_id:
+ Database ID, e.g. 'msduZKlMR8QCP_MsLsVRwy'
+ (given example is not a valid ID of an existing database).
+ pat:
+ Personal Access Token, e.g. 'exa_pat_aj39AsM3bYR9bQ4qk2wiG8SWHXbRUGNCThnep5YV73az6A'
+ (given example is not a valid PAT).
+
+ Mounted BucketFS directory arguments:
+ service_name:
+ Name of the BucketFS service (not a service url). Defaults to 'bfsdefault'.
+ bucket_name:
+ Name of the bucket. Currently, a PathLike cannot span multiple buckets.
+ base_path:
+ Explicitly specified root path in a file system. This is an alternative to
+ providing the service_name and the bucket_name.
+ """
+
+ backend=kwargs.pop('backend',StorageBackend.onprem)
+ path=kwargs.pop('path')if'path'inkwargselse''
+
+ ifisinstance(backend,str):
+ backend=StorageBackend[backend.lower()]
+ ifbackend==StorageBackend.onprem:
+ bucket=_create_onprem_bucket(**kwargs)
+ elifbackend==StorageBackend.saas:
+ bucket=_create_saas_bucket(**kwargs)
+ else:
+ bucket=_create_mounted_bucket(**kwargs)
+
+ returnBucketPath(path,bucket)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/_modules/index.html b/_modules/index.html
index 03bfa582..adcb8c3d 100644
--- a/_modules/index.html
+++ b/_modules/index.html
@@ -282,6 +282,7 @@
diff --git a/_sources/api.rst.txt b/_sources/api.rst.txt
index bc122e73..ecb37915 100644
--- a/_sources/api.rst.txt
+++ b/_sources/api.rst.txt
@@ -9,6 +9,13 @@ exasol.bucketfs.Service
:undoc-members:
:show-inheritance:
+exasol.bucketfs.BucketLike
+--------------------------
+.. autoclass:: exasol.bucketfs.BucketLike
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
exasol.bucketfs.Bucket
-----------------------
.. autoclass:: exasol.bucketfs.Bucket
@@ -16,6 +23,31 @@ exasol.bucketfs.Bucket
:undoc-members:
:show-inheritance:
+exasol.bucketfs.SaaSBucket
+--------------------------
+.. autoclass:: exasol.bucketfs.SaaSBucket
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+exasol.bucketfs.MountedBucket
+-----------------------------
+.. autoclass:: exasol.bucketfs.MountedBucket
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+exasol.bucketfs.path.PathLike
+-----------------------------
+.. autoclass:: exasol.bucketfs._path.PathLike
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+exasol.bucketfs.path.build_path
+-------------------------------
+.. autofunction:: exasol.bucketfs._path.build_path
+
exasol.bucketfs.as_bytes
------------------------
.. autofunction:: exasol.bucketfs.as_bytes
diff --git a/_sources/changes/unreleased.md.txt b/_sources/changes/unreleased.md.txt
index c0d363e8..7a8c588b 100644
--- a/_sources/changes/unreleased.md.txt
+++ b/_sources/changes/unreleased.md.txt
@@ -11,3 +11,4 @@ The current release adds a dependency to plugin `pytest_exasol_saas` and replace
## Documentation
* #144: Added comment on using fixtures from pytest-plugin `pytest-exasol-saas`
+* #147: Added documentation for the SaaS and the PathLike interface.
diff --git a/_sources/user_guide/basics.rst.txt b/_sources/user_guide/basics.rst.txt
index 04725195..53b7670b 100644
--- a/_sources/user_guide/basics.rst.txt
+++ b/_sources/user_guide/basics.rst.txt
@@ -3,8 +3,8 @@ Basic's
The Bucketfs Service
--------------------
-A single bucketfs service can host multiple buckets. In order to interact with a bucketfs service one
-can use the :ref:`exasol.bucketfs.Service ` class.
+In the On-Prem database, a single bucketfs service can host multiple buckets. In order to interact with a
+bucketfs service one can use the :ref:`exasol.bucketfs.Service ` class.
List buckets
++++++++++++
@@ -23,10 +23,16 @@ Get a Bucket reference
Bucket class
--------------
+------------
A Bucket contains a set of files which may be restricted, depending on the credentials of the requester.
-Using :ref:`exasol.bucketfs.Bucket ` class the user can interact (download, upload, list and delete) files.
-with the files in the bucket.
+The Bucket class for an On-Prem database is :ref:`exasol.bucketfs.Bucket `.
+The correspondent class for a SaaS database is exasol.bucketfs.SaaSBucket.
+Using these classes the user can interact with the files in the bucket (download, upload, list and delete them).
+
+Most of the examples below are based on the On-Prem implementation of the BucketFS. In the SaaS implementation
+there is only one BucketFS service, providing a single bucket. To access the BucketFS in SaaS the Bucket
+object should be created directly, as it is demonstrated in the last example. The interface of the Bucket
+object for the SaaS database is identical to that of the On-Prem database.
List files in a Bucket
++++++++++++++++++++++
@@ -73,6 +79,21 @@ Delete files from Bucket
:language: python3
:end-before: # Expert/Mapped bucket API
+Create bucket object in SaaS
+++++++++++++++++++++++++++++
+
+.. literalinclude:: /examples/bucket_saas.py
+ :language: python3
+
+PathLike interface
+------------------
+A PathLike is an interface similar to the pathlib.Path and should feel familiar to most users.
+
+Using the PathLike interface
+++++++++++++++++++++++++++++
+
+.. literalinclude:: /examples/path_like.py
+ :language: python3
Configure logging
+++++++++++++++++
diff --git a/_sources/user_guide/user_guide.rst.txt b/_sources/user_guide/user_guide.rst.txt
index eaf18787..b72c8871 100644
--- a/_sources/user_guide/user_guide.rst.txt
+++ b/_sources/user_guide/user_guide.rst.txt
@@ -6,10 +6,10 @@ Bucketfs
Depending on the database configuration, the bucketfs setup can range from straight forward to fairly complex.
This is due to the fact that:
-* Each database can have one or more BucketFS services
+* Each database can have one or more BucketFS services (in the On-Prem database)
* Each BucketFS service is available on all worker cluster of a database
* Each BucketFS service runs on all data nodes of a database
-* Each BucketFS service can have one or more Buckets
+* Each BucketFS service can have one or more Buckets (in the On-Prem database)
* Each Bucket can hold one or more files
The overview bellow tries to illustrate this in a more tangible manner.
diff --git a/api.html b/api.html
index 9c2e6805..d300036c 100644
--- a/api.html
+++ b/api.html
@@ -327,6 +327,105 @@
Q. What happens if the path doesn’t exist?
+A. It does nothing, no error.
+
Q. What happens if the path points to a directory?
+A. Same. There are no directories as such in the BucketFS, hence
+a directory path is just a non-existent file.
Downloads a file from the bucket. The content of the file will be provided
+in chunks of the specified size. The full content of the file can be constructed using
+code similar to the line below.
+content = b’’.join(api.download_file(path))
+
+
Parameters:
+
+
path – Path of the file in the bucket that should be downloaded.
+
chunk_size – Size of the chunks the file content will be delivered in.
+
+
+
+
Q. What happens if the file specified by the path doesn’t exist.
+A. BucketFsError will be raised.
+
Q. What happens if the path points to a directory.
+A. Same, since a “directory” in the BucketFS is just a non-existent file.
path – Path in the bucket where the file should be uploaded.
+
data – Either a binary array or a binary stream, e.g. a file opened in the binary mode.
+
+
+
+
Q. What happens if the parent is missing?
+A. The bucket doesn’t care about the structure of the file’s path. Looking from the prospective
+of a file system, the bucket will create the missing parent, but in reality it will just
+store the data indexed by the provided path.
+
Q. What happens if the path points to an existing file?
+A. That’s fine, the file will be updated.
+
Q. What happens if the path points to an existing directory?
+A. The bucket doesn’t care about the structure of the file’s path. Looking from the prospective
+of a file system, there will exist a file and directory with the same name.
+
Q. How should the path look like?
+A. It should look like a POSIX path, but it should not contain any of the NTFS invalid characters.
+It can have the leading and/or ending backslashes, which will be subsequently removed.
+If the path doesn’t conform to this format an BucketFsError will be raised.
Implementation of the BucketLike interface backed by a normal file system.
+The targeted use case is the access to the BucketFS files from a UDF.
+
+
Parameters:
+
+
service_name – Name of the BucketFS service (not a service url). Defaults to ‘bfsdefault’.
+
bucket_name – Name of the bucket. Defaults to ‘default’.
+
base_path – Instead of specifying the names of the service and the bucket, one can provide
+a full path to the root directory. This can be a useful option for testing when
+the backend is a local file system.
+If this parameter is not provided the root directory is set to
+buckets/<service_name>/<bucket_name>.
Note: In order to stay close to pathlib, by default rmdir with recursive
set to False won’t delete non-empty directories.
+
+
+
+
Parameters:
+
recursive – if true the directory itself and its entire contents (files and subdirs)
+will be deleted. If false and the directory is not empty an error will be thrown.
Creates a PathLike object based on a bucket in one of the BucketFS storage backends.
+It provides the same interface for the following BucketFS implementations:
+- On-Premises
+- SaaS
+- BucketFS files mounted as read-only directory in a UDF.
+
+
Parameters:
+
+
backend – This is a mandatory parameter that indicates the BucketFS storage backend.
+The available backends are defined in the StorageBackend enumeration,
+Currently, these are “onprem”, “saas” and “mounted”. The parameter value
+can be provided either as a string, e.g. “onprem”, or as an enum, e.g.
+StorageBackend.onprem.
+
path –
Optional parameter that selects a path within the bucket. If not provided
+the returned PathLike objects corresponds to the root of the bucket. Hence,
+an alternative way of creating a PathLike pointing to a particular file or
+directory is as in the code below.
+path = build_path(…) / “the_desired_path”
+
The rest of the arguments are backend specific.
+
On-prem arguments:
+
+
url – Url of the BucketFS service, e.g. http(s)://127.0.0.1:2580.
+
username – BucketFS username (generally, different from the DB username).
+
password – BucketFS user password.
+
bucket_name – Name of the bucket. Currently, a PathLike cannot span multiple buckets.
+
verify – Either a boolean, in which case it controls whether we verify the server’s
+TLS certificate, or a string, in which case it must be a path to a CA bundle
+to use. Defaults to True.
A single bucketfs service can host multiple buckets. In order to interact with a bucketfs service one
-can use the exasol.bucketfs.Service class.
+
In the On-Prem database, a single bucketfs service can host multiple buckets. In order to interact with a
+bucketfs service one can use the exasol.bucketfs.Service class.
A Bucket contains a set of files which may be restricted, depending on the credentials of the requester.
-Using exasol.bucketfs.Bucket class the user can interact (download, upload, list and delete) files.
-with the files in the bucket.
+The Bucket class for an On-Prem database is exasol.bucketfs.Bucket.
+The correspondent class for a SaaS database is exasol.bucketfs.SaaSBucket.
+Using these classes the user can interact with the files in the bucket (download, upload, list and delete them).
+
Most of the examples below are based on the On-Prem implementation of the BucketFS. In the SaaS implementation
+there is only one BucketFS service, providing a single bucket. To access the BucketFS in SaaS the Bucket
+object should be created directly, as it is demonstrated in the last example. The interface of the Bucket
+object for the SaaS database is identical to that of the On-Prem database.
"""
+This example is relevant for the Exasol SaaS database.
+It demonstrates the creation of a bucket object for a SaaS database.
+"""
+importos
+
+fromexasol.bucketfsimportSaaSBucket
+
+# Let's assume that the required SaaS connection parameters
+# are stored in environment variables.
+bucket=SaaSBucket(
+ url=os.environ.get('SAAS_URL'),
+ account_id=os.environ.get('SAAS_ACCOUNT_ID'),
+ database_id=os.environ.get('SAAS_DATABASE_ID'),
+ pat=os.environ.get('SAAS_PAT'),
+)
+
"""
+In this tutorial we will demonstrate the usage of the PathLike interface
+with an example of handling customer reviews.
+"""
+fromtypingimportByteString
+importtempfile
+importos
+
+importexasol.bucketfsasbfs
+
+# First, we need to get a path in the BucketFS where we will store reviews.
+# We will use the build_path() function for that. This function takes different
+# input parameters depending on the backend in use. We will set the type of
+# backed to the variable below. Please change it to bfs.path.StorageBackend.saas
+# if needed.
+backend=bfs.path.StorageBackend.onprem
+
+ifbackend==bfs.path.StorageBackend.onprem:
+ # The parameters below are the default BucketFS parameters of the Docker-DB
+ # running on a local machine. Please change them according to the settings of the
+ # On-Prem database being used. For better security, consider storing the password
+ # in an environment variable.
+ reviews=bfs.path.build_path(
+ backend=backend,
+ url="http://localhost:6666",
+ bucket_name='default',
+ service_name='bfsdefault',
+ path='reviews',
+ username='w',
+ password='write',
+ verify=False
+ )
+elifbackend==bfs.path.StorageBackend.saas:
+ # In case of a SaaS database we will assume that the required SaaS connection
+ # parameters are stored in environment variables.
+ reviews=bfs.path.build_path(
+ backend=backend,
+ url=os.environ.get('SAAS_URL'),
+ account_id=os.environ.get('SAAS_ACCOUNT_ID'),
+ database_id=os.environ.get('SAAS_DATABASE_ID'),
+ pat=os.environ.get('SAAS_PAT'),
+ path='reviews',
+ )
+else:
+ raiseRuntimeError(f'Unknown backend {backend}')
+
+# Let's create a path for good reviews and write some reviews there,
+# each into a separate file.
+good_reviews=reviews/'good'
+
+john_h_review=good_reviews/'John-H.review'
+john_h_review.write(
+ b'I had an amazing experience with this company! '
+ b'The customer service was top-notch, and the product exceeded my expectations. '
+ b'I highly recommend them to anyone looking for quality products and excellent service.'
+)
+
+sarah_l_review=good_reviews/'Sarah-L.review'
+sarah_l_review.write(
+ b'I am a repeat customer of this business, and they never disappoint. '
+ b'The team is always friendly and helpful, and their products are outstanding. '
+ b'I have recommended them to all my friends and family, and I will continue to do so!'
+)
+
+david_w_review=good_reviews/'David-W.review'
+david_w_review.write(
+ b'After trying several other companies, I finally found the perfect fit with this one. '
+ b'Their attention to detail and commitment to customer satisfaction is unparalleled. '
+ b'I will definitely be using their services again in the future.'
+)
+
+# Now let's write some bad reviews in a different subdirectory.
+bad_reviews=reviews/'bad'
+
+# Previously we provided content as a ByteString. But we can also use a file object,
+# as shown here.
+withtempfile.TemporaryFile()asfile_obj:
+ file_obj.write(
+ b'I first began coming here because of their amazing reviews. '
+ b'Unfortunately, my experiences have been overwhelmingly negative. '
+ b'I was billed more than 2,600 euros, the vast majority of which '
+ b'I did not consent to and were never carried out.'
+ )
+ file_obj.seek(0)
+ mike_s_review=bad_reviews/'Mike-S.review'
+ mike_s_review.write(file_obj)
+
+
+# A PathLike object supports an interface similar to the PosixPurePath.
+forpath_objin[reviews,good_reviews,john_h_review]:
+ print(path_obj)
+ print('\tname:',path_obj.name)
+ print('\tsuffix:',path_obj.suffix)
+ print('\tparent:',path_obj.parent)
+ print('\texists:',path_obj.exists())
+ print('\tis_dir:',path_obj.is_dir())
+ print('\tis_file:',path_obj.is_file())
+
+# The as_udf_path() function returns the correspondent path, as it's seen from a UDF.
+print("A UDF can find John's review at",john_h_review.as_udf_path())
+
+
+# The read() method returns an iterator over chunks of content.
+# The function below reads the whole content of the specified file.
+defread_content(bfs_path:bfs.path.PathLike)->ByteString:
+ returnb''.join(bfs_path.read())
+
+
+# Like the pathlib.Path class, the BucketFS PathLike object provides methods
+# to iterate over the content of a directory.
+# Let's use the iterdir() method to print all good reviews.
+foritemingood_reviews.iterdir():
+ ifitem.is_file():
+ print(item.name,'said:')
+ print(read_content(item))
+
+
+# The walk method allows traversing subdirectories.
+# Let's use this method to create a list of all review paths.
+all_reviews=[node/filefornode,_,filesinreviews.walk()forfileinfiles]
+forreviewinall_reviews:
+ print(review)
+
+
+# A file can be deleted using the rm() method. Please note that once the file is
+# deleted it won't be possible to write another file to the same path for a certain
+# time, due to internal internode synchronisation procedure.
+mike_s_review.rm()
+
+# A directory can be deleted using the rmdir() method. If it is not empty we need
+# to use the recursive=True option to delete the directory with all its content.
+good_reviews.rmdir(recursive=True)
+
+# Now all reviews should be deleted.
+print('Are any reviews left?',reviews.exists())
+
+# It may look surprising why a call to the review.exists() returns False, since we
+# have not deleted the base directory. In BucketFS a directory doesn't exist as a
+# distinct entity. Therefore, the exists() function called on a path for an empty
+# directory returns False.