From 1770c31e633d24bb4a6da23f599d900f70d77f80 Mon Sep 17 00:00:00 2001 From: jsouthgb Date: Tue, 5 Dec 2023 07:07:06 -0500 Subject: [PATCH 1/4] [urlgalleries] add support --- docs/supportedsites.md | 6 ++++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/urlgalleries.py | 43 ++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 gallery_dl/extractor/urlgalleries.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8f54b157..003dcaa9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known. Files + + Urlgalleries + https://urlgalleries.net/ + Galleries + + Vipergirls https://vipergirls.to/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 72239d5c..d074de22 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -155,6 +155,7 @@ modules = [ "tumblrgallery", "twibooru", "twitter", + "urlgalleries", "unsplash", "uploadir", "urlshortener", diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py new file mode 100644 index 00000000..ae2b7205 --- /dev/null +++ b/gallery_dl/extractor/urlgalleries.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://urlgalleries.net/""" + +from .common import GalleryExtractor +from .. import text + + +class UrlgalleriesExtractor(GalleryExtractor): + """Base class for Urlgalleries extractors""" + category = "urlgalleries" + root = "urlgalleries.net" + directory_fmt = ("{category}", "{title}") + pattern = r"(?:https?://)([^/?#]+)?\.urlgalleries\.net/([^/?#]+)/([^/?#]+)" + example = "https://blog.urlgalleries.net/gallery-1234567/a-title--1234" + + def __init__(self, match): + self.blog = match.group(1) + self.gallery_id = match.group(2) + self.title = match.group(3) + url = "{}.urlgalleries.net/{}/{}&a=10000".format( + self.blog, self.gallery_id, self.title) + GalleryExtractor.__init__(self, match, text.ensure_http_scheme(url)) + + def images(self, page): + extr = text.extr(page, 'id="wtf"', "") + url = "{}{{}}".format(self.root).format + return [ + (text.ensure_http_scheme(url(i)), None) + for i in text.extract_iter(extr, "href='", "'") + ] + + def metadata(self, page): + date = text.extr( + page, "float:left;'> ", '').split(" | ")[-1] + return { + 'title': self.title, + 'date': text.parse_datetime(date, format='%B %d, %Y T%H:%M') + } From ecaa0feb5d9fc39d4b26aefa211d250e817f90fd Mon Sep 17 00:00:00 2001 From: jsouthgb Date: Tue, 5 Dec 2023 07:08:11 -0500 Subject: [PATCH 2/4] [urlgalleries] add support --- gallery_dl/extractor/urlgalleries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py index ae2b7205..aa6e7db5 100644 --- a/gallery_dl/extractor/urlgalleries.py +++ b/gallery_dl/extractor/urlgalleries.py @@ -10,7 +10,7 @@ from .common import GalleryExtractor from .. import text -class UrlgalleriesExtractor(GalleryExtractor): +class UrlgalleriesGalleryExtractor(GalleryExtractor): """Base class for Urlgalleries extractors""" category = "urlgalleries" root = "urlgalleries.net" From c29ae9af0830367d9b1f9063018d00654c634c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 8 Dec 2023 22:43:56 +0100 Subject: [PATCH 3/4] [urlgalleries] simplify + resolve redirects --- gallery_dl/extractor/urlgalleries.py | 54 +++++++++++++++++----------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py index aa6e7db5..b21709a9 100644 --- a/gallery_dl/extractor/urlgalleries.py +++ b/gallery_dl/extractor/urlgalleries.py @@ -6,7 +6,7 @@ """Extractors for https://urlgalleries.net/""" -from .common import GalleryExtractor +from .common import GalleryExtractor, Message from .. import text @@ -14,30 +14,42 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor): """Base class for Urlgalleries extractors""" category = "urlgalleries" root = "urlgalleries.net" - directory_fmt = ("{category}", "{title}") - pattern = r"(?:https?://)([^/?#]+)?\.urlgalleries\.net/([^/?#]+)/([^/?#]+)" - example = "https://blog.urlgalleries.net/gallery-1234567/a-title--1234" + request_interval = (0.5, 1.0) + pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)" + example = "https://blog.urlgalleries.net/gallery-12345/TITLE" def __init__(self, match): - self.blog = match.group(1) - self.gallery_id = match.group(2) - self.title = match.group(3) - url = "{}.urlgalleries.net/{}/{}&a=10000".format( - self.blog, self.gallery_id, self.title) - GalleryExtractor.__init__(self, match, text.ensure_http_scheme(url)) + self.blog, self.gallery_id = match.groups() + url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format( + self.blog, self.gallery_id) + GalleryExtractor.__init__(self, match, url) - def images(self, page): - extr = text.extr(page, 'id="wtf"', "") - url = "{}{{}}".format(self.root).format - return [ - (text.ensure_http_scheme(url(i)), None) - for i in text.extract_iter(extr, "href='", "'") - ] + def items(self): + page = self.request(self.gallery_url).text + imgs = self.images(page) + data = self.metadata(page) + data["count"] = len(imgs) + del page + + root = "https://{}.urlgalleries.net".format(self.blog) + yield Message.Directory, data + for data["num"], img in enumerate(imgs, 1): + response = self.request( + root + img, method="HEAD", allow_redirects=False) + yield Message.Queue, response.headers["Location"], data def metadata(self, page): - date = text.extr( - page, "float:left;'> ", '').split(" | ")[-1] + extr = text.extract_from(page) return { - 'title': self.title, - 'date': text.parse_datetime(date, format='%B %d, %Y T%H:%M') + "gallery_id": self.gallery_id, + "_site": extr(' title="', '"'), # site name + "blog" : text.unescape(extr(' title="', '"')), + "_rprt": extr(' title="', '"'), # report button + "title": text.unescape(extr(' title="', '"').strip()), + "date" : text.parse_datetime( + extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"), } + + def images(self, page): + imgs = text.extr(page, 'id="wtf"', "") + return list(text.extract_iter(imgs, " href='", "'")) From ade93c539776eb0b550ca6ddf6b0f28ad6685e82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 8 Dec 2023 22:55:16 +0100 Subject: [PATCH 4/4] [urlgalleries] add tests --- test/results/urlgalleries.py | 49 ++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 test/results/urlgalleries.py diff --git a/test/results/urlgalleries.py b/test/results/urlgalleries.py new file mode 100644 index 00000000..88a321e7 --- /dev/null +++ b/test/results/urlgalleries.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import urlgalleries + + +__tests__ = ( +{ + "#url" : "https://photos2q.urlgalleries.net/porn-gallery-7851311/clarice-window-8", + "#category": ("", "urlgalleries", "gallery"), + "#class" : urlgalleries.UrlgalleriesGalleryExtractor, + "#range" : "1-3", + "#urls" : ( + "https://fappic.com/x207mqkn2463/4gq1yv.jpg", + "https://fappic.com/q684ua2rp0j9/4gq1xv.jpg", + "https://fappic.com/8vf3n8fgz9po/4gq1ya.jpg", + ), + + "blog" : "photos2q", + "count" : 39, + "date" : "dt:2023-12-08 13:59:00", + "gallery_id": "7851311", + "num" : range(1, 3), + "title" : "Clarice window 8", +}, + +{ + "#url" : "https://dreamer.urlgalleries.net/7645840", + "#category": ("", "urlgalleries", "gallery"), + "#class" : urlgalleries.UrlgalleriesGalleryExtractor, + "#range" : "1-3", + "#urls" : ( + "https://www.fappic.com/vj7up04ny487/AmourAngels-0001.jpg", + "https://www.fappic.com/zfgsmpm36iyv/AmourAngels-0002.jpg", + "https://www.fappic.com/rqpt37rdbwa5/AmourAngels-0003.jpg", + ), + + "blog" : "Dreamer", + "count" : 105, + "date" : "dt:2020-03-10 21:17:00", + "gallery_id": "7645840", + "num" : range(1, 3), + "title" : "Angelika - Rustic Charm - AmourAngels 2016-09-27", +}, + +)