From dfb48469491885d3a90efd17cb01dc05da3af82b Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Fri, 25 Oct 2024 15:00:37 +0300 Subject: [PATCH 1/4] Resolves #145; resolves #144 --- CHANGELOG.md | 9 ++++++++- one/__init__.py | 2 +- one/params.py | 13 +++++++++++-- one/remote/aws.py | 2 +- one/tests/test_params.py | 12 ++++++++++-- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 587853d6..dddf14ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog -## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.10.0] +## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.10.1] + +### Modified + +- prompt user to strip quotation marks if used during ONE setup +- indicate when downloading from S3 + +## [2.10.0] This version improves behaviour of loading revisions and loading datasets from list_datasets output. ### Modified diff --git a/one/__init__.py b/one/__init__.py index 07a5d384..442d51f1 100644 --- a/one/__init__.py +++ b/one/__init__.py @@ -1,2 +1,2 @@ """The Open Neurophysiology Environment (ONE) API.""" -__version__ = '2.10.0' +__version__ = '2.10.1' diff --git a/one/params.py b/one/params.py index 6de042cc..83f31b7b 100644 --- a/one/params.py +++ b/one/params.py @@ -124,6 +124,7 @@ def setup(client=None, silent=False, make_default=None, username=None, cache_dir if not silent: prompt = 'Param %s, current value is ["%s"]:' par = iopar.as_dict(par_default) + quotes = '"\'`' # Iterate through non-password pars for k in filter(lambda k: 'PWD' not in k, par.keys()): cpar = _get_current_par(k, par_current) @@ -137,10 +138,18 @@ def setup(client=None, silent=False, make_default=None, username=None, cache_dir url_parsed = urlsplit(par[k]) if not (url_parsed.netloc and re.match('https?', url_parsed.scheme)): raise ValueError(f'{k} must be valid HTTP URL') - if k == 'ALYX_URL': - client = par[k] else: par[k] = input(prompt % (k, cpar)).strip() or cpar + # Check whether user erroneously entered quotation marks + # Prompting the user here (hopefully) corrects them before they input a password + # where the use of quotation marks may be legitimate + if par[k] and len(par[k]) >= 2 and par[k][0] in quotes and par[k][-1] in quotes: + warnings.warn('Do not use quotation marks with input answers', UserWarning) + ans = input('Strip quotation marks from response? [Y/n]:').strip() or 'y' + if ans.lower()[0] == 'y': + par[k] = par[k].strip(quotes) + if k == 'ALYX_URL': + client = par[k] cpar = _get_current_par('HTTP_DATA_SERVER_PWD', par_current) prompt = f'Enter the FlatIron HTTP password for {par["HTTP_DATA_SERVER_LOGIN"]} '\ diff --git a/one/remote/aws.py b/one/remote/aws.py index 9ae2792a..cc0a07ff 100644 --- a/one/remote/aws.py +++ b/one/remote/aws.py @@ -254,7 +254,7 @@ def s3_download_file(source, destination, s3=None, bucket_name=None, overwrite=F _logger.debug(f"{destination} exists and match size -- skipping") return destination with tqdm(total=filesize, unit='B', - unit_scale=True, desc=str(destination)) as t: + unit_scale=True, desc=f'(S3) {destination}') as t: file_object.download_file(Filename=str(destination), Callback=_callback_hook(t)) except (NoCredentialsError, PartialCredentialsError) as ex: raise ex # Credentials need updating in Alyx # pragma: no cover diff --git a/one/tests/test_params.py b/one/tests/test_params.py index 0ae61079..9aeccd28 100644 --- a/one/tests/test_params.py +++ b/one/tests/test_params.py @@ -57,8 +57,16 @@ def test_setup(self, _): # Check verification prompt resp_map = {'ALYX_LOGIN': 'mistake', 'settings correct?': 'N'} with mock.patch('one.params.input', new=partial(self._mock_input, **resp_map)): - cache = one.params.setup() - self.assertNotEqual(cache.ALYX_LOGIN, 'mistake') + one.params.setup() + par = one.params.get(self.url, silent=True) + self.assertNotEqual(par.ALYX_LOGIN, 'mistake') + + # Check prompt when quotation marks used + resp_map = {'ALYX_LOGIN': '"foo"', 'Strip quotation marks': 'y', 'settings correct?': 'Y'} + with mock.patch('one.params.input', new=partial(self._mock_input, **resp_map)): + self.assertWarnsRegex(UserWarning, 'quotation marks', one.params.setup) + par = one.params.get(self.url, silent=True) + self.assertEqual(par.ALYX_LOGIN, 'foo', 'failed to strip quotes from user input') # Check that raises ValueError when bad URL provided self.url = 'ftp://foo.bar.org' From 4cd1a19b591c7214c8d93951ff2ddb1b50b46952 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Fri, 25 Oct 2024 19:33:08 +0300 Subject: [PATCH 2/4] Added keep_eid_index kwarg to One.list_datasets --- CHANGELOG.md | 1 + one/api.py | 46 +++++++++++++++++++++++-------------------- one/tests/test_one.py | 5 +++++ 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dddf14ff..d9746329 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - prompt user to strip quotation marks if used during ONE setup - indicate when downloading from S3 +- added 'keep_eid_index' kwarg to One.list_datasets which will return the data frame with the eid index level reinstated ## [2.10.0] This version improves behaviour of loading revisions and loading datasets from list_datasets output. diff --git a/one/api.py b/one/api.py index c375f735..d09c4cad 100644 --- a/one/api.py +++ b/one/api.py @@ -712,7 +712,8 @@ def list_subjects(self) -> List[str]: @util.refresh def list_datasets( self, eid=None, filename=None, collection=None, revision=None, qc=QC.FAIL, - ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False + ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False, + keep_eid_index=False ) -> Union[np.ndarray, pd.DataFrame]: """ Given an eid, return the datasets for those sessions. @@ -748,6 +749,10 @@ def list_datasets( default_revisions_only : bool When true, only matching datasets that are considered default revisions are returned. If no 'default_revision' column is present, and ALFError is raised. + keep_eid_index : bool + If details is true, this determines whether the returned data frame contains the eid + in the index. When false (default) the returned data frame index is the dataset id + only, otherwise the index is a MultIndex with levels (eid, id). Returns ------- @@ -798,8 +803,15 @@ def list_datasets( return datasets.iloc[0:0] # Return empty datasets = util.filter_datasets(datasets, **filter_args) - # Return only the relative path - return datasets if details else datasets['rel_path'].sort_values().values.tolist() + if details: + if keep_eid_index and datasets.index.nlevels == 1: + # Reinstate eid index + datasets = pd.concat({str(eid): datasets}, names=['eid']) + # Return the full data frame + return datasets + else: + # Return only the relative path + return datasets['rel_path'].sort_values().values.tolist() @util.refresh def list_collections(self, eid=None, filename=None, collection=None, revision=None, @@ -1000,7 +1012,8 @@ def load_object(self, >>> load_object(eid, 'spikes', attribute=['times*', 'clusters']) """ query_type = query_type or self.mode - datasets = self.list_datasets(eid, details=True, query_type=query_type) + datasets = self.list_datasets( + eid, details=True, query_type=query_type, keep_eid_index=True) if len(datasets) == 0: raise alferr.ALFObjectNotFound(obj) @@ -1022,9 +1035,6 @@ def load_object(self, # For those that don't exist, download them offline = None if query_type == 'auto' else self.mode == 'local' - if datasets.index.nlevels == 1: - # Reinstate eid index - datasets = pd.concat({str(eid): datasets}, names=['eid']) files = self._check_filesystem(datasets, offline=offline, check_hash=check_hash) files = [x for x in files if x] if not files: @@ -1112,7 +1122,8 @@ def load_dataset(self, wildcards/regular expressions must not be used. To use wildcards, pass the collection and revision as separate keyword arguments. """ - datasets = self.list_datasets(eid, details=True, query_type=query_type or self.mode) + datasets = self.list_datasets( + eid, details=True, query_type=query_type or self.mode, keep_eid_index=True) # If only two parts and wildcards are on, append ext wildcard if self.wildcards and isinstance(dataset, str) and len(dataset.split('.')) == 2: dataset += '.*' @@ -1129,9 +1140,6 @@ def load_dataset(self, wildcards=self.wildcards, assert_unique=assert_unique) if len(datasets) == 0: raise alferr.ALFObjectNotFound(f'Dataset "{dataset}" not found') - if datasets.index.nlevels == 1: - # Reinstate eid index - datasets = pd.concat({str(eid): datasets}, names=['eid']) # Check files exist / download remote files offline = None if query_type == 'auto' else self.mode == 'local' @@ -1271,7 +1279,8 @@ def _verify_specifiers(specifiers): # Short circuit query_type = query_type or self.mode - all_datasets = self.list_datasets(eid, details=True, query_type=query_type) + all_datasets = self.list_datasets( + eid, details=True, query_type=query_type, keep_eid_index=True) if len(all_datasets) == 0: if assert_present: raise alferr.ALFObjectNotFound(f'No datasets found for session {eid}') @@ -1303,9 +1312,6 @@ def _verify_specifiers(specifiers): for x, y, z in zip(datasets, collections, revisions)] present = [len(x) == 1 for x in slices] present_datasets = pd.concat(slices) - if present_datasets.index.nlevels == 1: - # Reinstate eid index - present_datasets = pd.concat({str(eid): present_datasets}, names=['eid']) # Check if user is blindly downloading all data and warn of non-default revisions if 'default_revision' in present_datasets and \ @@ -1463,8 +1469,8 @@ def load_collection(self, No datasets match the object, attribute or revision filters for this collection. """ query_type = query_type or self.mode - datasets = self.list_datasets(eid, details=True, collection=collection, - query_type=query_type) + datasets = self.list_datasets( + eid, details=True, collection=collection, query_type=query_type, keep_eid_index=True) if len(datasets) == 0: raise alferr.ALFError(f'{collection} not found for session {eid}') @@ -1477,9 +1483,6 @@ def load_collection(self, if len(datasets) == 0: raise alferr.ALFObjectNotFound(object or '') parts = [alfiles.rel_path_parts(x) for x in datasets.rel_path] - if datasets.index.nlevels == 1: - # Reinstate eid index - datasets = pd.concat({str(eid): datasets}, names=['eid']) # For those that don't exist, download them offline = None if query_type == 'auto' else self.mode == 'local' @@ -1829,7 +1832,8 @@ def describe_dataset(self, dataset_type=None): @util.refresh def list_datasets( self, eid=None, filename=None, collection=None, revision=None, qc=QC.FAIL, - ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False + ignore_qc_not_set=False, details=False, query_type=None, default_revisions_only=False, + keep_eid_index=False ) -> Union[np.ndarray, pd.DataFrame]: filters = dict( collection=collection, filename=filename, revision=revision, qc=qc, diff --git a/one/tests/test_one.py b/one/tests/test_one.py index c88351be..dd15fd38 100644 --- a/one/tests/test_one.py +++ b/one/tests/test_one.py @@ -404,6 +404,11 @@ def test_list_datasets(self): self.assertEqual(27, len(dsets)) self.assertEqual(1, dsets.index.nlevels, 'details data frame should be without eid index') + # Test keep_eid_index parameter + dsets = self.one.list_datasets('KS005/2019-04-02/001', details=True, keep_eid_index=True) + self.assertEqual(27, len(dsets)) + self.assertEqual(2, dsets.index.nlevels, 'details data frame should be with eid index') + # Test filters filename = {'attribute': ['times', 'intervals'], 'extension': 'npy'} dsets = self.one.list_datasets('ZFM-01935/2021-02-05/001', filename) From 6f6a9d15d22cf60c0aaf4c1e2987762e115cd837 Mon Sep 17 00:00:00 2001 From: olivier Date: Fri, 25 Oct 2024 17:56:48 +0100 Subject: [PATCH 3/4] pass keep_eid_index to subfunction --- one/api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/one/api.py b/one/api.py index d09c4cad..15df0228 100644 --- a/one/api.py +++ b/one/api.py @@ -1839,10 +1839,12 @@ def list_datasets( collection=collection, filename=filename, revision=revision, qc=qc, ignore_qc_not_set=ignore_qc_not_set, default_revisions_only=default_revisions_only) if (query_type or self.mode) != 'remote': - return super().list_datasets(eid, details=details, query_type=query_type, **filters) + return super().list_datasets(eid, details=details, keep_eid_index=keep_eid_index, + query_type=query_type, **filters) elif not eid: warnings.warn('Unable to list all remote datasets') - return super().list_datasets(eid, details=details, query_type=query_type, **filters) + return super().list_datasets(eid, details=details, keep_eid_index=keep_eid_index, + query_type=query_type, **filters) eid = self.to_eid(eid) # Ensure we have a UUID str list if not eid: return self._cache['datasets'].iloc[0:0] if details else [] # Return empty From 43ce76bc702b0e97a0b81164f2bcb09283713598 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Wed, 30 Oct 2024 12:48:57 +0200 Subject: [PATCH 4/4] Resolves https://github.com/int-brain-lab/iblenv/issues/384 --- CHANGELOG.md | 1 + one/api.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9746329..9aa9d944 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - prompt user to strip quotation marks if used during ONE setup - indicate when downloading from S3 - added 'keep_eid_index' kwarg to One.list_datasets which will return the data frame with the eid index level reinstated +- HOTFIX: include Subject/lab part in destination path when downloading from S3 ## [2.10.0] This version improves behaviour of loading revisions and loading datasets from list_datasets output. diff --git a/one/api.py b/one/api.py index 15df0228..cfa9d3ea 100644 --- a/one/api.py +++ b/one/api.py @@ -2373,8 +2373,9 @@ def _download_aws(self, dsets, update_exists=True, keep_uuid=None, **_) -> List[ assert record['relative_path'].endswith(dset['rel_path']), \ f'Relative path for dataset {uuid} does not match Alyx record' source_path = PurePosixPath(record['data_repository_path'], record['relative_path']) + local_path = self.cache_dir.joinpath(alfiles.get_alf_path(source_path)) + # Add UUIDs to filenames, if required source_path = alfiles.add_uuid_string(source_path, uuid) - local_path = self.cache_dir.joinpath(record['relative_path']) if keep_uuid is True or (keep_uuid is None and self.uuid_filenames is True): local_path = alfiles.add_uuid_string(local_path, uuid) local_path.parent.mkdir(exist_ok=True, parents=True)