From 38e66940c1b3eccb67113d583fc6a4981f147d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 3 Mar 2021 15:37:56 +0100 Subject: [PATCH] [tumblrgallery] simplify --- docs/supportedsites.rst | 2 + gallery_dl/extractor/tumblrgallery.py | 69 ++++++++------------------- 2 files changed, 22 insertions(+), 49 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 10a381d5..4c7c00e0 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -97,6 +97,7 @@ Mangoxo https://www.mangoxo.com/ Albums, Channels mastodon.social https://mastodon.social/ Images from Statuses, User Profiles `OAuth `__ My Hentai Gallery https://myhentaigallery.com/ Galleries Naver https://blog.naver.com/ Blogs, Posts +NaverWebtoon https://comic.naver.com/ Comics, Episodes Newgrounds https://www.newgrounds.com/ |newgrounds-C| Supported Ngomik http://ngomik.in/ Chapters nhentai https://nhentai.net/ Galleries, Search Results @@ -146,6 +147,7 @@ The /b/ Archive https://thebarchive.com/ Boards, Search Results, Thecollection https://the-collection.booru.org/ Posts, Tag Searches Tsumino https://www.tsumino.com/ Galleries, Search Results Supported Tumblr https://www.tumblr.com/ Likes, Posts, Tag Searches, User Profiles `OAuth `__ +TumblrGallery https://tumblrgallery.xyz/ Posts, Search Results, Tumblrblogs Turboimagehost https://www.turboimagehost.com/ individual Images Twitter https://twitter.com/ |twitter-C| Supported Unsplash https://unsplash.com/ |unsplash-C| diff --git a/gallery_dl/extractor/tumblrgallery.py b/gallery_dl/extractor/tumblrgallery.py index c9ef16a7..849dc49c 100644 --- a/gallery_dl/extractor/tumblrgallery.py +++ b/gallery_dl/extractor/tumblrgallery.py @@ -1,50 +1,36 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann -# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://tumblrgallery.xyz/""" +"""Extractors for https://tumblrgallery.xyz/""" from .common import GalleryExtractor from .. import text - -BASE_PATTERN = r"(?:https?://)tumblrgallery\.xyz" +BASE_PATTERN = r"(?:https?://)?tumblrgallery\.xyz" -class TumblrgalleryGalleryExtractor(GalleryExtractor): +class TumblrgalleryExtractor(GalleryExtractor): """Base class for tumblrgallery extractors""" category = "tumblrgallery" - cookiedomain = None - - def __init__(self, match): - self.root = "https://tumblrgallery.xyz" - GalleryExtractor.__init__(self, match) - - -class TumblrgalleryTumblrblogExtractor(TumblrgalleryGalleryExtractor): - """Extractor for Tumblrblog on tumblrgallery.xyz""" - subcategory = "tumblrblog" - pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+).html)" - test = ( - "https://tumblrgallery.xyz/tumblrblog/gallery/103975.html", { - "pattern": r"/tumblrblog/gallery/103975.html" - r"103975", - } - ) - filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}" directory_fmt = ("{category}", "{gallery_id} {title}") + root = "https://tumblrgallery.xyz" + + +class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor): + """Extractor for Tumblrblog on tumblrgallery.xyz""" + subcategory = "tumblrblog" + pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+)\.html)" + test = ("https://tumblrgallery.xyz/tumblrblog/gallery/103975.html",) def __init__(self, match): - TumblrgalleryGalleryExtractor.__init__(self, match) + TumblrgalleryExtractor.__init__(self, match) self.gallery_id = text.parse_int(match.group(2)) def metadata(self, page): - """Collect metadata for extractor-job""" return { "title" : text.unescape(text.extract(page, "

", "

"))[0], "gallery_id": self.gallery_id, @@ -76,26 +62,17 @@ class TumblrgalleryTumblrblogExtractor(TumblrgalleryGalleryExtractor): } -class TumblrgalleryPostExtractor(TumblrgalleryGalleryExtractor): +class TumblrgalleryPostExtractor(TumblrgalleryExtractor): """Extractor for Posts on tumblrgallery.xyz""" subcategory = "post" - pattern = BASE_PATTERN + r"(/post/(\d+).html)" - test = ( - "https://tumblrgallery.xyz/post/405674.html", { - "pattern": r"/post/405674.html" - r"405674", - } - ) - - filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}" - directory_fmt = ("{category}", "{gallery_id} {title}") + pattern = BASE_PATTERN + r"(/post/(\d+)\.html)" + test = ("https://tumblrgallery.xyz/post/405674.html",) def __init__(self, match): - TumblrgalleryGalleryExtractor.__init__(self, match) + TumblrgalleryExtractor.__init__(self, match) self.gallery_id = text.parse_int(match.group(2)) def metadata(self, page): - """Collect metadata for extractor-job""" return { "title" : text.remove_html( text.unescape(text.extract(page, "", "")[0]) @@ -117,25 +94,19 @@ class TumblrgalleryPostExtractor(TumblrgalleryGalleryExtractor): } -class TumblrgallerySearchExtractor(TumblrgalleryGalleryExtractor): +class TumblrgallerySearchExtractor(TumblrgalleryExtractor): """Extractor for Search result on tumblrgallery.xyz""" subcategory = "search" - pattern = BASE_PATTERN + r"(/s\.php\?q=(.*))" - test = ( - "https://tumblrgallery.xyz/s.php?q=everyday-life", { - "pattern": r"everyday-life", - } - ) - filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}" directory_fmt = ("{category}", "{search_term}") + pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))" + test = ("https://tumblrgallery.xyz/s.php?q=everyday-life",) def __init__(self, match): + TumblrgalleryExtractor.__init__(self, match) self.search_term = match.group(2) - TumblrgalleryGalleryExtractor.__init__(self, match) def metadata(self, page): - """Collect metadata for extractor-job""" return { "search_term": self.search_term, }