From 69219996a86da53b01c3e75302cfa2318c555005 Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Mon, 9 Sep 2024 10:55:57 +0200 Subject: [PATCH 1/4] Added limit and page size to pagination --- CHANGELOG.md | 1 + src/neptune/api/pagination.py | 28 +++++++++++---- tests/unit/neptune/new/api/test_pagination.py | 36 +++++++++++++++---- 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a527f99a6..7ad9c9516 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ - Series values fetching reworked with protocol buffer support ([#1744](https://github.com/neptune-ai/neptune-client/pull/1744)) - Added support for enhanced field definitions querying ([#1751](https://github.com/neptune-ai/neptune-client/pull/1751)) - Added support for `NQL` `MATCHES` operator ([#1863](https://github.com/neptune-ai/neptune-client/pull/1863)) +- Pagination respecting `limit` parameter and page size ([]()) ### Fixes - Fixed `tqdm.notebook` import only in Notebook environment ([#1716](https://github.com/neptune-ai/neptune-client/pull/1716)) diff --git a/src/neptune/api/pagination.py b/src/neptune/api/pagination.py index 19b9b308a..6ab8bf892 100644 --- a/src/neptune/api/pagination.py +++ b/src/neptune/api/pagination.py @@ -16,12 +16,13 @@ __all__ = ("paginate_over",) import abc +import itertools from dataclasses import dataclass from typing import ( Any, Callable, - Iterable, Iterator, + List, Optional, TypeVar, ) @@ -46,15 +47,30 @@ def __call__(self, *, next_page: Optional[NextPage] = None, **kwargs: Any) -> An def paginate_over( getter: Paginatable, - extract_entries: Callable[[T], Iterable[Entry]], + extract_entries: Callable[[T], List[Entry]], + page_size: int = 50, + limit: Optional[int] = None, **kwargs: Any, ) -> Iterator[Entry]: """ Generic approach to pagination via `NextPage` """ - data = getter(**kwargs, next_page=None) - yield from extract_entries(data) + data = getter(**kwargs, next_page=NextPage(limit=page_size, next_page_token=None)) + results = extract_entries(data) + counter = len(results[:limit]) + + yield from itertools.islice(results, limit) while data.next_page is not None and data.next_page.next_page_token is not None: - data = getter(**kwargs, next_page=data.next_page) - yield from extract_entries(data) + to_fetch = page_size + if limit is not None: + if counter >= limit: + break + to_fetch = min(page_size, limit - counter) + + data = getter(**kwargs, next_page=NextPage(limit=to_fetch, next_page_token=data.next_page.next_page_token)) + results = extract_entries(data) + if limit is not None: + counter += len(results[:to_fetch]) + + yield from itertools.islice(results, to_fetch) diff --git a/tests/unit/neptune/new/api/test_pagination.py b/tests/unit/neptune/new/api/test_pagination.py index 7a4d25542..8829a94a4 100644 --- a/tests/unit/neptune/new/api/test_pagination.py +++ b/tests/unit/neptune/new/api/test_pagination.py @@ -75,9 +75,9 @@ def test__multiple_pages(): assert getter.call_count == 3 assert getter.call_args_list == [ - call(next_page=None), - call(next_page=NextPage(next_page_token="aa", limit=None)), - call(next_page=NextPage(next_page_token="bb", limit=None)), + call(next_page=NextPage(next_page_token=None, limit=50)), + call(next_page=NextPage(next_page_token="aa", limit=50)), + call(next_page=NextPage(next_page_token="bb", limit=50)), ] @@ -99,7 +99,31 @@ def test__kwargs_passed(): assert getter.call_count == 3 assert getter.call_args_list == [ - call(a=1, b=2, next_page=None), - call(a=1, b=2, next_page=NextPage(next_page_token="aa", limit=None)), - call(a=1, b=2, next_page=NextPage(next_page_token="bb", limit=None)), + call(a=1, b=2, next_page=NextPage(next_page_token=None, limit=50)), + call(a=1, b=2, next_page=NextPage(next_page_token="aa", limit=50)), + call(a=1, b=2, next_page=NextPage(next_page_token="bb", limit=50)), + ] + + +def test__page_size(): + # given + getter = Mock( + side_effect=[ + Mock(next_page=NextPage(next_page_token="aa", limit=None)), + Mock(next_page=NextPage(next_page_token="bb", limit=None)), + Mock(next_page=None), + ] + ) + + # when + entries = list(paginate_over(getter=getter, extract_entries=extract_entries, page_size=10, a=1, b=2)) + + # then + assert entries == [1, 2, 3, 1, 2, 3, 1, 2, 3] + + assert getter.call_count == 3 + assert getter.call_args_list == [ + call(a=1, b=2, next_page=NextPage(next_page_token=None, limit=10)), + call(a=1, b=2, next_page=NextPage(next_page_token="aa", limit=10)), + call(a=1, b=2, next_page=NextPage(next_page_token="bb", limit=10)), ] From 177cca893a41e43a665286023ec0db7b0274526e Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Mon, 9 Sep 2024 10:59:46 +0200 Subject: [PATCH 2/4] CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad9c9516..c26b1d3a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ - Series values fetching reworked with protocol buffer support ([#1744](https://github.com/neptune-ai/neptune-client/pull/1744)) - Added support for enhanced field definitions querying ([#1751](https://github.com/neptune-ai/neptune-client/pull/1751)) - Added support for `NQL` `MATCHES` operator ([#1863](https://github.com/neptune-ai/neptune-client/pull/1863)) -- Pagination respecting `limit` parameter and page size ([]()) +- Pagination respecting `limit` parameter and page size ([#1866](https://github.com/neptune-ai/neptune-client/pull/1866)) ### Fixes - Fixed `tqdm.notebook` import only in Notebook environment ([#1716](https://github.com/neptune-ai/neptune-client/pull/1716)) From 5dd455d6b1d1063215411ee9f85a6ea1b43148db Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Mon, 9 Sep 2024 11:02:52 +0200 Subject: [PATCH 3/4] Unittests --- tests/unit/neptune/new/api/test_pagination.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/unit/neptune/new/api/test_pagination.py b/tests/unit/neptune/new/api/test_pagination.py index 8829a94a4..b7b35a0be 100644 --- a/tests/unit/neptune/new/api/test_pagination.py +++ b/tests/unit/neptune/new/api/test_pagination.py @@ -127,3 +127,26 @@ def test__page_size(): call(a=1, b=2, next_page=NextPage(next_page_token="aa", limit=10)), call(a=1, b=2, next_page=NextPage(next_page_token="bb", limit=10)), ] + + +def test__limit(): + # given + getter = Mock( + side_effect=[ + Mock(next_page=NextPage(next_page_token="aa", limit=None)), + Mock(next_page=NextPage(next_page_token="bb", limit=None)), + Mock(next_page=None), + ] + ) + + # when + entries = list(paginate_over(getter=getter, extract_entries=extract_entries, page_size=3, limit=5, a=1, b=2)) + + # then + assert entries == [1, 2, 3, 1, 2] + + assert getter.call_count == 2 + assert getter.call_args_list == [ + call(a=1, b=2, next_page=NextPage(next_page_token=None, limit=3)), + call(a=1, b=2, next_page=NextPage(next_page_token="aa", limit=2)), + ] From 32162265b0615122c9d7354b0d0fbd94e74772c8 Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Mon, 9 Sep 2024 11:58:57 +0200 Subject: [PATCH 4/4] Code review --- src/neptune/api/pagination.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/neptune/api/pagination.py b/src/neptune/api/pagination.py index 6ab8bf892..4d7646c71 100644 --- a/src/neptune/api/pagination.py +++ b/src/neptune/api/pagination.py @@ -55,9 +55,11 @@ def paginate_over( """ Generic approach to pagination via `NextPage` """ + counter = 0 data = getter(**kwargs, next_page=NextPage(limit=page_size, next_page_token=None)) results = extract_entries(data) - counter = len(results[:limit]) + if limit is not None: + counter = len(results[:limit]) yield from itertools.islice(results, limit)