From 4b8b71c392cd4381ac8de9826e7ca764ea0eda7f Mon Sep 17 00:00:00 2001 From: blankie Date: Fri, 19 Jan 2024 14:31:21 +1100 Subject: [PATCH 1/3] [photos18] add support --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/photos18.py | 103 +++++++++++++++++++++++++++++++ test/results/photos18.py | 19 ++++++ 4 files changed, 129 insertions(+) create mode 100644 gallery_dl/extractor/photos18.py create mode 100644 test/results/photos18.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4a6d8bd249..9c8cf0d4fb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -619,6 +619,12 @@ Consider all listed sites to potentially be NSFW. Albums, individual Images + + Photos18 + https://photos18.com + Albums, Lists + + PhotoVogue https://www.vogue.com/photovogue/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 863089176a..c8721f202b 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -114,6 +114,7 @@ "patreon", "philomena", "photobucket", + "photos18", "photovogue", "picarto", "piczel", diff --git a/gallery_dl/extractor/photos18.py b/gallery_dl/extractor/photos18.py new file mode 100644 index 0000000000..11415ee6e4 --- /dev/null +++ b/gallery_dl/extractor/photos18.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://photos18.com""" + +from .common import Extractor, Message +from .. import text + + +BASE_PATTERN = r"(?:https?://)(?:www\.)?photos18\.com" +SORTING_METHODS = "(created|hits|views|score|likes)" + + +class Photos18Extractor(Extractor): + """Base class for Photos18 extractors""" + category = "photos18" + directory_fmt = ("{category}", "{category_name}") + filename_fmt = "{category}_{title}_{num:>02}.{extension}" + archive_fmt = "{filename}" + root = "https://www.photos18.com" + + def items(self): + for post_id in self.posts(): + url = self.root + "/v/" + post_id + page = self.request(url).text + extr = text.extract_from(page) + + title = text.unescape(extr( + '', '<')) + date = text.parse_datetime(extr('"datePublished":"', '"')) + + urls = [] + while True: + url = text.unescape(extr( + '
Date: Sat, 20 Jan 2024 01:50:17 +1100 Subject: [PATCH 2/3] [photos18] fix getting post titles --- gallery_dl/extractor/photos18.py | 4 ++-- test/results/photos18.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/photos18.py b/gallery_dl/extractor/photos18.py index 11415ee6e4..043b702557 100644 --- a/gallery_dl/extractor/photos18.py +++ b/gallery_dl/extractor/photos18.py @@ -28,12 +28,12 @@ def items(self): page = self.request(url).text extr = text.extract_from(page) - title = text.unescape(extr( - '', '<')) date = text.parse_datetime(extr('"datePublished":"', '"')) + title = text.unescape(extr( + '

', '

')).strip() urls = [] while True: diff --git a/test/results/photos18.py b/test/results/photos18.py index 5f208aa70b..8b91a218c2 100644 --- a/test/results/photos18.py +++ b/test/results/photos18.py @@ -14,6 +14,21 @@ "#class" : photos18.Photos18AlbumExtractor, "#count" : 12, "#sha1_url": "2f9442f34f31bafdd6d57f4954674348b38ef284", + + "title" : "Peachmilky Nanami, Mami Rent-a-Girlfriend", + "category_id" : 8, + "category_name": "COSPLAY", +}, + +{ + "#url" : "https://www.photos18.com/v/jMMn2", + "#category": ("", "photos18", "album"), + "#class" : photos18.Photos18AlbumExtractor, + "#count" : 36, + + "title" : "姐姐說兒童節幫我\"轉大人\"Kenna James - Stepbro Accidentally Cums In Stepsister's Pussy", + "category_id" : 1, + "category_name": "歐美寫真", }, ) From 3f82852650e1106eee2aef5ad2a3bb1e2300373c Mon Sep 17 00:00:00 2001 From: blankie Date: Sat, 20 Jan 2024 02:03:02 +1100 Subject: [PATCH 3/3] [photos18] do pagination Photos18ListExtractor bails if there are no posts within a page, since (as of this commit), some pages might have improper navigation bars (for example, https://www.photos18.com/cat/2?page=75 still has a link to the next page despite the list of posts being far outside that range) --- gallery_dl/extractor/photos18.py | 90 +++++++++++++++++--------------- test/results/photos18.py | 8 +++ 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/gallery_dl/extractor/photos18.py b/gallery_dl/extractor/photos18.py index 043b702557..b562b13ec0 100644 --- a/gallery_dl/extractor/photos18.py +++ b/gallery_dl/extractor/photos18.py @@ -22,41 +22,6 @@ class Photos18Extractor(Extractor): archive_fmt = "{filename}" root = "https://www.photos18.com" - def items(self): - for post_id in self.posts(): - url = self.root + "/v/" + post_id - page = self.request(url).text - extr = text.extract_from(page) - - category_id = int(extr( - '