From 51db8263a901b95e24e087b2a8450fb2fc5ed03a Mon Sep 17 00:00:00 2001 From: "Jeremy A. Prescott" Date: Mon, 19 Feb 2024 11:02:45 +0100 Subject: [PATCH] paginate mapping endpoints (#1506) closes lig-4613 - paginate mapping endpoint to allow >400k samples without overloading the API --- lightly/api/api_workflow_client.py | 9 +++++++-- lightly/api/api_workflow_download_dataset.py | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lightly/api/api_workflow_client.py b/lightly/api/api_workflow_client.py index f695b48a3..7033ef803 100644 --- a/lightly/api/api_workflow_client.py +++ b/lightly/api/api_workflow_client.py @@ -221,8 +221,13 @@ def get_filenames(self) -> List[str]: :meta private: # Skip docstring generation """ - filenames_on_server = self._mappings_api.get_sample_mappings_by_dataset_id( - dataset_id=self.dataset_id, field="fileName" + filenames_on_server = list( + utils.paginate_endpoint( + self._mappings_api.get_sample_mappings_by_dataset_id, + page_size=25000, + dataset_id=self.dataset_id, + field="fileName", + ) ) return filenames_on_server diff --git a/lightly/api/api_workflow_download_dataset.py b/lightly/api/api_workflow_download_dataset.py index 9cac18882..cda122482 100644 --- a/lightly/api/api_workflow_download_dataset.py +++ b/lightly/api/api_workflow_download_dataset.py @@ -93,8 +93,13 @@ def download_dataset( ) # get sample ids - sample_ids = self._mappings_api.get_sample_mappings_by_dataset_id( - self.dataset_id, field="_id" + sample_ids = list( + utils.paginate_endpoint( + self._mappings_api.get_sample_mappings_by_dataset_id, + page_size=25000, + dataset_id=self.dataset_id, + field="_id", + ) ) indices = BitMask.from_hex(tag.bit_mask_data).to_indices()