From dfb48469491885d3a90efd17cb01dc05da3af82b Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Fri, 25 Oct 2024 15:00:37 +0300
Subject: [PATCH 1/4] Resolves #145; resolves #144

---
 CHANGELOG.md             |  9 ++++++++-
 one/__init__.py          |  2 +-
 one/params.py            | 13 +++++++++++--
 one/remote/aws.py        |  2 +-
 one/tests/test_params.py | 12 ++++++++++--
 5 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 587853d6..dddf14ff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
-## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.10.0]
+## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.10.1]
+
+### Modified
+
+- prompt user to strip quotation marks if used during ONE setup
+- indicate when downloading from S3
+
+## [2.10.0]
 This version improves behaviour of loading revisions and loading datasets from list_datasets output. 
 
 ### Modified
diff --git a/one/__init__.py b/one/__init__.py
index 07a5d384..442d51f1 100644
--- a/one/__init__.py
+++ b/one/__init__.py
@@ -1,2 +1,2 @@
 """The Open Neurophysiology Environment (ONE) API."""
-__version__ = '2.10.0'
+__version__ = '2.10.1'
diff --git a/one/params.py b/one/params.py
index 6de042cc..83f31b7b 100644
--- a/one/params.py
+++ b/one/params.py
@@ -124,6 +124,7 @@ def setup(client=None, silent=False, make_default=None, username=None, cache_dir
     if not silent:
         prompt = 'Param %s, current value is ["%s"]:'
         par = iopar.as_dict(par_default)
+        quotes = '"\'`'
         # Iterate through non-password pars
         for k in filter(lambda k: 'PWD' not in k, par.keys()):
             cpar = _get_current_par(k, par_current)
@@ -137,10 +138,18 @@ def setup(client=None, silent=False, make_default=None, username=None, cache_dir
                 url_parsed = urlsplit(par[k])
                 if not (url_parsed.netloc and re.match('https?', url_parsed.scheme)):
                     raise ValueError(f'{k} must be valid HTTP URL')
-                if k == 'ALYX_URL':
-                    client = par[k]
             else:
                 par[k] = input(prompt % (k, cpar)).strip() or cpar
+            # Check whether user erroneously entered quotation marks
+            # Prompting the user here (hopefully) corrects them before they input a password
+            # where the use of quotation marks may be legitimate
+            if par[k] and len(par[k]) >= 2 and par[k][0] in quotes and par[k][-1] in quotes:
+                warnings.warn('Do not use quotation marks with input answers', UserWarning)
+                ans = input('Strip quotation marks from response? [Y/n]:').strip() or 'y'
+                if ans.lower()[0] == 'y':
+                    par[k] = par[k].strip(quotes)
+            if k == 'ALYX_URL':
+                client = par[k]
 
         cpar = _get_current_par('HTTP_DATA_SERVER_PWD', par_current)
         prompt = f'Enter the FlatIron HTTP password for {par["HTTP_DATA_SERVER_LOGIN"]} '\
diff --git a/one/remote/aws.py b/one/remote/aws.py
index 9ae2792a..cc0a07ff 100644
--- a/one/remote/aws.py
+++ b/one/remote/aws.py
@@ -254,7 +254,7 @@ def s3_download_file(source, destination, s3=None, bucket_name=None, overwrite=F
             _logger.debug(f"{destination} exists and match size -- skipping")
             return destination
         with tqdm(total=filesize, unit='B',
-                  unit_scale=True, desc=str(destination)) as t:
+                  unit_scale=True, desc=f'(S3) {destination}') as t:
             file_object.download_file(Filename=str(destination), Callback=_callback_hook(t))
     except (NoCredentialsError, PartialCredentialsError) as ex:
         raise ex  # Credentials need updating in Alyx # pragma: no cover
diff --git a/one/tests/test_params.py b/one/tests/test_params.py
index 0ae61079..9aeccd28 100644
--- a/one/tests/test_params.py
+++ b/one/tests/test_params.py
@@ -57,8 +57,16 @@ def test_setup(self, _):
         # Check verification prompt
         resp_map = {'ALYX_LOGIN': 'mistake', 'settings correct?': 'N'}
         with mock.patch('one.params.input', new=partial(self._mock_input, **resp_map)):
-            cache = one.params.setup()
-            self.assertNotEqual(cache.ALYX_LOGIN, 'mistake')
+            one.params.setup()
+            par = one.params.get(self.url, silent=True)
+            self.assertNotEqual(par.ALYX_LOGIN, 'mistake')
+
+        # Check prompt when quotation marks used
+        resp_map = {'ALYX_LOGIN': '"foo"', 'Strip quotation marks': 'y', 'settings correct?': 'Y'}
+        with mock.patch('one.params.input', new=partial(self._mock_input, **resp_map)):
+            self.assertWarnsRegex(UserWarning, 'quotation marks', one.params.setup)
+            par = one.params.get(self.url, silent=True)
+            self.assertEqual(par.ALYX_LOGIN, 'foo', 'failed to strip quotes from user input')
 
         # Check that raises ValueError when bad URL provided
         self.url = 'ftp://foo.bar.org'

From 4cd1a19b591c7214c8d93951ff2ddb1b50b46952 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Fri, 25 Oct 2024 19:33:08 +0300
Subject: [PATCH 2/4] Added keep_eid_index kwarg to One.list_datasets

---
 CHANGELOG.md          |  1 +
 one/api.py            | 46 +++++++++++++++++++++++--------------------
 one/tests/test_one.py |  5 +++++
 3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dddf14ff..d9746329 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 
 - prompt user to strip quotation marks if used during ONE setup
 - indicate when downloading from S3
+- added 'keep_eid_index' kwarg to One.list_datasets which will return the data frame with the eid index level reinstated
 
 ## [2.10.0]
 This version improves behaviour of loading revisions and loading datasets from list_datasets output. 
diff --git a/one/api.py b/one/api.py
index c375f735..d09c4cad 100644
--- a/one/api.py
+++ b/one/api.py
@@ -712,7 +712,8 @@ def list_subjects(self) -> List[str]:
     @util.refresh
     def list_datasets(
             self, eid=None, filename=None, collection=None, revision=None, qc=QC.FAIL,
-            ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False
+            ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False,
+            keep_eid_index=False
     ) -> Union[np.ndarray, pd.DataFrame]:
         """
         Given an eid, return the datasets for those sessions.
@@ -748,6 +749,10 @@ def list_datasets(
         default_revisions_only : bool
             When true, only matching datasets that are considered default revisions are returned.
             If no 'default_revision' column is present, and ALFError is raised.
+        keep_eid_index : bool
+            If details is true, this determines whether the returned data frame contains the eid
+            in the index. When false (default) the returned data frame index is the dataset id
+            only, otherwise the index is a MultIndex with levels (eid, id).
 
         Returns
         -------
@@ -798,8 +803,15 @@ def list_datasets(
             return datasets.iloc[0:0]  # Return empty
 
         datasets = util.filter_datasets(datasets, **filter_args)
-        # Return only the relative path
-        return datasets if details else datasets['rel_path'].sort_values().values.tolist()
+        if details:
+            if keep_eid_index and datasets.index.nlevels == 1:
+                # Reinstate eid index
+                datasets = pd.concat({str(eid): datasets}, names=['eid'])
+            # Return the full data frame
+            return datasets
+        else:
+            # Return only the relative path
+            return datasets['rel_path'].sort_values().values.tolist()
 
     @util.refresh
     def list_collections(self, eid=None, filename=None, collection=None, revision=None,
@@ -1000,7 +1012,8 @@ def load_object(self,
         >>> load_object(eid, 'spikes', attribute=['times*', 'clusters'])
         """
         query_type = query_type or self.mode
-        datasets = self.list_datasets(eid, details=True, query_type=query_type)
+        datasets = self.list_datasets(
+            eid, details=True, query_type=query_type, keep_eid_index=True)
 
         if len(datasets) == 0:
             raise alferr.ALFObjectNotFound(obj)
@@ -1022,9 +1035,6 @@ def load_object(self,
 
         # For those that don't exist, download them
         offline = None if query_type == 'auto' else self.mode == 'local'
-        if datasets.index.nlevels == 1:
-            # Reinstate eid index
-            datasets = pd.concat({str(eid): datasets}, names=['eid'])
         files = self._check_filesystem(datasets, offline=offline, check_hash=check_hash)
         files = [x for x in files if x]
         if not files:
@@ -1112,7 +1122,8 @@ def load_dataset(self,
             wildcards/regular expressions must not be used. To use wildcards, pass the collection
             and revision as separate keyword arguments.
         """
-        datasets = self.list_datasets(eid, details=True, query_type=query_type or self.mode)
+        datasets = self.list_datasets(
+            eid, details=True, query_type=query_type or self.mode, keep_eid_index=True)
         # If only two parts and wildcards are on, append ext wildcard
         if self.wildcards and isinstance(dataset, str) and len(dataset.split('.')) == 2:
             dataset += '.*'
@@ -1129,9 +1140,6 @@ def load_dataset(self,
                                         wildcards=self.wildcards, assert_unique=assert_unique)
         if len(datasets) == 0:
             raise alferr.ALFObjectNotFound(f'Dataset "{dataset}" not found')
-        if datasets.index.nlevels == 1:
-            # Reinstate eid index
-            datasets = pd.concat({str(eid): datasets}, names=['eid'])
 
         # Check files exist / download remote files
         offline = None if query_type == 'auto' else self.mode == 'local'
@@ -1271,7 +1279,8 @@ def _verify_specifiers(specifiers):
 
         # Short circuit
         query_type = query_type or self.mode
-        all_datasets = self.list_datasets(eid, details=True, query_type=query_type)
+        all_datasets = self.list_datasets(
+            eid, details=True, query_type=query_type, keep_eid_index=True)
         if len(all_datasets) == 0:
             if assert_present:
                 raise alferr.ALFObjectNotFound(f'No datasets found for session {eid}')
@@ -1303,9 +1312,6 @@ def _verify_specifiers(specifiers):
                   for x, y, z in zip(datasets, collections, revisions)]
         present = [len(x) == 1 for x in slices]
         present_datasets = pd.concat(slices)
-        if present_datasets.index.nlevels == 1:
-            # Reinstate eid index
-            present_datasets = pd.concat({str(eid): present_datasets}, names=['eid'])
 
         # Check if user is blindly downloading all data and warn of non-default revisions
         if 'default_revision' in present_datasets and \
@@ -1463,8 +1469,8 @@ def load_collection(self,
             No datasets match the object, attribute or revision filters for this collection.
         """
         query_type = query_type or self.mode
-        datasets = self.list_datasets(eid, details=True, collection=collection,
-                                      query_type=query_type)
+        datasets = self.list_datasets(
+            eid, details=True, collection=collection, query_type=query_type, keep_eid_index=True)
 
         if len(datasets) == 0:
             raise alferr.ALFError(f'{collection} not found for session {eid}')
@@ -1477,9 +1483,6 @@ def load_collection(self,
         if len(datasets) == 0:
             raise alferr.ALFObjectNotFound(object or '')
         parts = [alfiles.rel_path_parts(x) for x in datasets.rel_path]
-        if datasets.index.nlevels == 1:
-            # Reinstate eid index
-            datasets = pd.concat({str(eid): datasets}, names=['eid'])
 
         # For those that don't exist, download them
         offline = None if query_type == 'auto' else self.mode == 'local'
@@ -1829,7 +1832,8 @@ def describe_dataset(self, dataset_type=None):
     @util.refresh
     def list_datasets(
             self, eid=None, filename=None, collection=None, revision=None, qc=QC.FAIL,
-            ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False
+            ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False,
+            keep_eid_index=False
     ) -> Union[np.ndarray, pd.DataFrame]:
         filters = dict(
             collection=collection, filename=filename, revision=revision, qc=qc,
diff --git a/one/tests/test_one.py b/one/tests/test_one.py
index c88351be..dd15fd38 100644
--- a/one/tests/test_one.py
+++ b/one/tests/test_one.py
@@ -404,6 +404,11 @@ def test_list_datasets(self):
         self.assertEqual(27, len(dsets))
         self.assertEqual(1, dsets.index.nlevels, 'details data frame should be without eid index')
 
+        # Test keep_eid_index parameter
+        dsets = self.one.list_datasets('KS005/2019-04-02/001', details=True, keep_eid_index=True)
+        self.assertEqual(27, len(dsets))
+        self.assertEqual(2, dsets.index.nlevels, 'details data frame should be with eid index')
+
         # Test filters
         filename = {'attribute': ['times', 'intervals'], 'extension': 'npy'}
         dsets = self.one.list_datasets('ZFM-01935/2021-02-05/001', filename)

From 6f6a9d15d22cf60c0aaf4c1e2987762e115cd837 Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Fri, 25 Oct 2024 17:56:48 +0100
Subject: [PATCH 3/4] pass keep_eid_index to subfunction

---
 one/api.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/one/api.py b/one/api.py
index d09c4cad..15df0228 100644
--- a/one/api.py
+++ b/one/api.py
@@ -1839,10 +1839,12 @@ def list_datasets(
             collection=collection, filename=filename, revision=revision, qc=qc,
             ignore_qc_not_set=ignore_qc_not_set, default_revisions_only=default_revisions_only)
         if (query_type or self.mode) != 'remote':
-            return super().list_datasets(eid, details=details, query_type=query_type, **filters)
+            return super().list_datasets(eid, details=details, keep_eid_index=keep_eid_index,
+                                         query_type=query_type, **filters)
         elif not eid:
             warnings.warn('Unable to list all remote datasets')
-            return super().list_datasets(eid, details=details, query_type=query_type, **filters)
+            return super().list_datasets(eid, details=details, keep_eid_index=keep_eid_index,
+                                         query_type=query_type, **filters)
         eid = self.to_eid(eid)  # Ensure we have a UUID str list
         if not eid:
             return self._cache['datasets'].iloc[0:0] if details else []  # Return empty

From 43ce76bc702b0e97a0b81164f2bcb09283713598 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Wed, 30 Oct 2024 12:48:57 +0200
Subject: [PATCH 4/4] Resolves
 https://github.com/int-brain-lab/iblenv/issues/384

---
 CHANGELOG.md | 1 +
 one/api.py   | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9746329..9aa9d944 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 - prompt user to strip quotation marks if used during ONE setup
 - indicate when downloading from S3
 - added 'keep_eid_index' kwarg to One.list_datasets which will return the data frame with the eid index level reinstated
+- HOTFIX: include Subject/lab part in destination path when downloading from S3
 
 ## [2.10.0]
 This version improves behaviour of loading revisions and loading datasets from list_datasets output. 
diff --git a/one/api.py b/one/api.py
index 15df0228..cfa9d3ea 100644
--- a/one/api.py
+++ b/one/api.py
@@ -2373,8 +2373,9 @@ def _download_aws(self, dsets, update_exists=True, keep_uuid=None, **_) -> List[
             assert record['relative_path'].endswith(dset['rel_path']), \
                 f'Relative path for dataset {uuid} does not match Alyx record'
             source_path = PurePosixPath(record['data_repository_path'], record['relative_path'])
+            local_path = self.cache_dir.joinpath(alfiles.get_alf_path(source_path))
+            # Add UUIDs to filenames, if required
             source_path = alfiles.add_uuid_string(source_path, uuid)
-            local_path = self.cache_dir.joinpath(record['relative_path'])
             if keep_uuid is True or (keep_uuid is None and self.uuid_filenames is True):
                 local_path = alfiles.add_uuid_string(local_path, uuid)
             local_path.parent.mkdir(exist_ok=True, parents=True)