Skip to content

Commit

Permalink
[tumblr] implement 'pagination' option (#5880)
Browse files Browse the repository at this point in the history
restore pagination behavior from before
de670bd
  • Loading branch information
mikf committed Jul 23, 2024
1 parent 7b445ec commit 540eaa5
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 11 deletions.
17 changes: 17 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3735,6 +3735,23 @@ Description
use an extra HTTP request to find the URL to its full-resolution version.


extractor.tumblr.pagination
---------------------------
Type
``string``
Default
``"offset"``
Description
Controls how to paginate over blog posts.

* ``"api"``: ``next`` parameter provided by the API
(potentially misses posts due to a
`bug <https://github.com/tumblr/docs/issues/76>`__
in Tumblr's API)
* ``"before"``: timestamp of last post
* ``"offset"``: post offset number


extractor.tumblr.ratelimit
--------------------------
Type
Expand Down
47 changes: 36 additions & 11 deletions gallery_dl/extractor/tumblr.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def avatar(self, blog, size="512"):
def posts(self, blog, params):
"""Retrieve published posts"""
params["offset"] = self.extractor.config("offset")
params["limit"] = "50"
params["limit"] = 50
params["reblog_info"] = "true"
params["type"] = self.posts_type
params["before"] = self.before
Expand All @@ -398,8 +398,14 @@ def posts(self, blog, params):

def likes(self, blog):
"""Retrieve liked posts"""
endpoint = "/v2/blog/{}/likes".format(blog)
params = {"limit": "50", "before": self.before}
return self._pagination(blog, "/likes", params, key="liked_posts")
while True:
posts = self._call(endpoint, params)["liked_posts"]
if not posts:
return
yield from posts
params["before"] = posts[-1]["liked_timestamp"]

def _call(self, endpoint, params, **kwargs):
url = self.ROOT + endpoint
Expand Down Expand Up @@ -474,20 +480,39 @@ def _pagination(self, blog, endpoint, params, key="posts", cache=False):
if self.api_key:
params["api_key"] = self.api_key

strategy = self.extractor.config("pagination")
while True:
data = self._call(endpoint, params)

if cache:
self.BLOG_CACHE[blog] = data["blog"]
cache = False

yield from data[key]

try:
endpoint = data["_links"]["next"]["href"]
except KeyError:
return
posts = data[key]
yield from posts

params = None
if self.api_key:
endpoint += "&api_key=" + self.api_key
if strategy == "api":
try:
endpoint = data["_links"]["next"]["href"]
except KeyError:
return

params = None
if self.api_key:
endpoint += "&api_key=" + self.api_key

elif strategy == "before":
if not posts:
return
timestamp = posts[-1]["timestamp"] + 1
if params["before"] and timestamp >= params["before"]:
return
params["before"] = timestamp
params["offset"] = None

else: # offset
params["offset"] = \
text.parse_int(params["offset"]) + params["limit"]
params["before"] = None
if params["offset"] >= data["total_posts"]:
return

0 comments on commit 540eaa5

Please sign in to comment.