From e1993968721cd343caf6cbc120674d807507da79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 23 May 2025 21:26:13 +0200 Subject: [PATCH] [common] simplify 'user' extractors by using 'Dispatch' mixin --- gallery_dl/extractor/ao3.py | 8 +--- gallery_dl/extractor/bluesky.py | 8 +--- gallery_dl/extractor/civitai.py | 8 +--- gallery_dl/extractor/common.py | 58 ++++++++++++++++----------- gallery_dl/extractor/deviantart.py | 10 +---- gallery_dl/extractor/furaffinity.py | 11 +---- gallery_dl/extractor/hentaifoundry.py | 8 +--- gallery_dl/extractor/instagram.py | 11 +---- gallery_dl/extractor/newgrounds.py | 8 +--- gallery_dl/extractor/nijie.py | 9 +---- gallery_dl/extractor/pixiv.py | 14 ++----- gallery_dl/extractor/pornhub.py | 14 ++----- gallery_dl/extractor/twitter.py | 23 ++++------- gallery_dl/extractor/vsco.py | 8 +--- gallery_dl/extractor/wallhaven.py | 14 ++----- gallery_dl/extractor/weibo.py | 4 +- 16 files changed, 73 insertions(+), 143 deletions(-) diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py index d3ab8468..881217e3 100644 --- a/gallery_dl/extractor/ao3.py +++ b/gallery_dl/extractor/ao3.py @@ -8,7 +8,7 @@ """Extractors for https://archiveofourown.org/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache @@ -249,16 +249,12 @@ class Ao3SearchExtractor(Ao3Extractor): example = "https://archiveofourown.org/works/search?work_search[query]=air" -class Ao3UserExtractor(Ao3Extractor): +class Ao3UserExtractor(Dispatch, Ao3Extractor): """Extractor for an AO3 user profile""" - subcategory = "user" pattern = (BASE_PATTERN + r"/users/([^/?#]+(?:/pseuds/[^/?#]+)?)" r"(?:/profile)?/?(?:$|\?|#)") example = "https://archiveofourown.org/users/USER" - def initialize(self): - pass - def items(self): base = "{}/users/{}/".format(self.root, self.groups[0]) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index 6f4abd51..b2cc9c82 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -8,7 +8,7 @@ """Extractors for https://bsky.app/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache, memcache @@ -210,14 +210,10 @@ class BlueskyExtractor(Extractor): },) -class BlueskyUserExtractor(BlueskyExtractor): - subcategory = "user" +class BlueskyUserExtractor(Dispatch, BlueskyExtractor): pattern = USER_PATTERN + r"$" example = "https://bsky.app/profile/HANDLE" - def initialize(self): - pass - def items(self): base = "{}/profile/{}/".format(self.root, self.groups[0]) default = ("posts" if self.config("quoted", False) or diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index 56fe851c..b40065b9 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -8,7 +8,7 @@ """Extractors for https://www.civitai.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import memcache import itertools @@ -396,14 +396,10 @@ class CivitaiImagesExtractor(CivitaiExtractor): return self.api.images(params) -class CivitaiUserExtractor(CivitaiExtractor): - subcategory = "user" +class CivitaiUserExtractor(Dispatch, CivitaiExtractor): pattern = USER_PATTERN + r"/?(?:$|\?|#)" example = "https://civitai.com/user/USER" - def initialize(self): - pass - def items(self): base = "{}/user/{}/".format(self.root, self.groups[0]) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index c430ec10..808a1043 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -616,29 +616,6 @@ class Extractor(): fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S") return get("date-min", dmin), get("date-max", dmax) - def _dispatch_extractors(self, extractor_data, default=()): - """ """ - extractors = { - data[0].subcategory: data - for data in extractor_data - } - - include = self.config("include", default) or () - if include == "all": - include = extractors - elif isinstance(include, str): - include = include.replace(" ", "").split(",") - - result = [(Message.Version, 1)] - for category in include: - try: - extr, url = extractors[category] - except KeyError: - self.log.warning("Invalid include '%s'", category) - else: - result.append((Message.Queue, url, {"_extractor": extr})) - return iter(result) - @classmethod def _dump(cls, obj): util.dump_json(obj, ensure_ascii=False, indent=2) @@ -796,6 +773,41 @@ class MangaExtractor(Extractor): """Return a list of all (chapter-url, metadata)-tuples""" +class Dispatch(): + subcategory = "user" + cookies_domain = None + finalize = Extractor.finalize + skip = Extractor.skip + + def __iter__(self): + return self.items() + + def initialize(self): + pass + + def _dispatch_extractors(self, extractor_data, default=()): + extractors = { + data[0].subcategory: data + for data in extractor_data + } + + include = self.config("include", default) or () + if include == "all": + include = extractors + elif isinstance(include, str): + include = include.replace(" ", "").split(",") + + result = [(Message.Version, 1)] + for category in include: + try: + extr, url = extractors[category] + except KeyError: + self.log.warning("Invalid include '%s'", category) + else: + result.append((Message.Queue, url, {"_extractor": extr})) + return iter(result) + + class AsynchronousMixin(): """Run info extraction in a separate thread""" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index fc0e436d..99c2e990 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -8,7 +8,7 @@ """Extractors for https://www.deviantart.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache, memcache import collections @@ -873,17 +873,11 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ .replace("\\\\", "\\") -class DeviantartUserExtractor(DeviantartExtractor): +class DeviantartUserExtractor(Dispatch, DeviantartExtractor): """Extractor for an artist's user profile""" - subcategory = "user" pattern = BASE_PATTERN + r"/?$" example = "https://www.deviantart.com/USER" - def initialize(self): - pass - - skip = Extractor.skip - def items(self): base = "{}/{}/".format(self.root, self.user) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index a022c7c0..24856dcc 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -8,7 +8,7 @@ """Extractors for https://www.furaffinity.net/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net" @@ -321,18 +321,11 @@ class FuraffinityPostExtractor(FuraffinityExtractor): return (post_id,) -class FuraffinityUserExtractor(FuraffinityExtractor): +class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor): """Extractor for furaffinity user profiles""" - subcategory = "user" - cookies_domain = None pattern = BASE_PATTERN + r"/user/([^/?#]+)" example = "https://www.furaffinity.net/user/USER/" - def initialize(self): - pass - - skip = Extractor.skip - def items(self): base = "{}/{{}}/{}/".format(self.root, self.user) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 7e128a4c..d746fac6 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -8,7 +8,7 @@ """Extractors for https://www.hentai-foundry.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util BASE_PATTERN = r"(https?://)?(?:www\.)?hentai-foundry\.com" @@ -192,15 +192,11 @@ class HentaifoundryExtractor(Extractor): self.request(url, method="POST", data=data) -class HentaifoundryUserExtractor(HentaifoundryExtractor): +class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor): """Extractor for a hentaifoundry user profile""" - subcategory = "user" pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile" example = "https://www.hentai-foundry.com/user/USER/profile" - def initialize(self): - pass - def items(self): root = self.root user = "/user/" + self.user diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 624bba2d..21bd0594 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -9,7 +9,7 @@ """Extractors for https://www.instagram.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache, memcache import itertools @@ -430,18 +430,11 @@ class InstagramExtractor(Extractor): user[key] = 0 -class InstagramUserExtractor(InstagramExtractor): +class InstagramUserExtractor(Dispatch, InstagramExtractor): """Extractor for an Instagram user profile""" - subcategory = "user" pattern = USER_PATTERN + r"/?(?:$|[?#])" example = "https://www.instagram.com/USER/" - def initialize(self): - pass - - def finalize(self): - pass - def items(self): base = "{}/{}/".format(self.root, self.item) stories = "{}/stories/{}/".format(self.root, self.item) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 648f7df8..b6533048 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -8,7 +8,7 @@ """Extractors for https://www.newgrounds.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache import itertools @@ -450,15 +450,11 @@ class NewgroundsGamesExtractor(NewgroundsExtractor): example = "https://USER.newgrounds.com/games" -class NewgroundsUserExtractor(NewgroundsExtractor): +class NewgroundsUserExtractor(Dispatch, NewgroundsExtractor): """Extractor for a newgrounds user profile""" - subcategory = "user" pattern = USER_PATTERN + r"/?$" example = "https://USER.newgrounds.com" - def initialize(self): - pass - def items(self): base = self.user_root + "/" return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index b01c591f..658cb759 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -8,7 +8,7 @@ """Extractors for nijie instances""" -from .common import BaseExtractor, Message, AsynchronousMixin +from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin from .. import text, exception from ..cache import cache @@ -177,16 +177,11 @@ BASE_PATTERN = NijieExtractor.update({ }) -class NijieUserExtractor(NijieExtractor): +class NijieUserExtractor(Dispatch, NijieExtractor): """Extractor for nijie user profiles""" - subcategory = "user" - cookies_domain = None pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)" example = "https://nijie.info/members.php?id=12345" - def initialize(self): - pass - def items(self): fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 73c5c1c8..c12d46d1 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -8,7 +8,7 @@ """Extractors for https://www.pixiv.net/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache, memcache from datetime import datetime, timedelta @@ -367,23 +367,15 @@ class PixivExtractor(Extractor): return {} -class PixivUserExtractor(PixivExtractor): +class PixivUserExtractor(Dispatch, PixivExtractor): """Extractor for a pixiv user profile""" - subcategory = "user" pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" r")(\d+)(?:$|[?#])") example = "https://www.pixiv.net/en/users/12345" - def __init__(self, match): - PixivExtractor.__init__(self, match) - self.user_id = match.group(1) - - def initialize(self): - pass - def items(self): - base = "{}/users/{}/".format(self.root, self.user_id) + base = "{}/users/{}/".format(self.root, self.groups[0]) return self._dispatch_extractors(( (PixivAvatarExtractor , base + "avatar"), (PixivBackgroundExtractor , base + "background"), diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py index 9800eb23..bae21194 100644 --- a/gallery_dl/extractor/pornhub.py +++ b/gallery_dl/extractor/pornhub.py @@ -8,7 +8,7 @@ """Extractors for https://www.pornhub.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, exception BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com" @@ -164,21 +164,13 @@ class PornhubGifExtractor(PornhubExtractor): yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif) -class PornhubUserExtractor(PornhubExtractor): +class PornhubUserExtractor(Dispatch, PornhubExtractor): """Extractor for a pornhub user""" - subcategory = "user" pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$" example = "https://www.pornhub.com/model/USER" - def __init__(self, match): - PornhubExtractor.__init__(self, match) - self.user = match.group(1) - - def initialize(self): - pass - def items(self): - base = "{}/{}/".format(self.root, self.user) + base = "{}/{}/".format(self.root, self.groups[0]) return self._dispatch_extractors(( (PornhubPhotosExtractor, base + "photos"), (PornhubGifsExtractor , base + "gifs"), diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 896bf288..4aa01d5f 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -8,7 +8,7 @@ """Extractors for https://x.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache, memcache import itertools @@ -577,27 +577,18 @@ class TwitterExtractor(Extractor): return self.cookies_update(_login_impl(self, username, password)) -class TwitterUserExtractor(TwitterExtractor): +class TwitterUserExtractor(Dispatch, TwitterExtractor): """Extractor for a Twitter user""" - subcategory = "user" pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])" r"|i(?:/user/|ntent/user\?user_id=)(\d+))") example = "https://x.com/USER" - def __init__(self, match): - TwitterExtractor.__init__(self, match) - user_id = match.group(2) - if user_id: - self.user = "id:" + user_id - - def initialize(self): - pass - - def finalize(self): - pass - def items(self): - base = "{}/{}/".format(self.root, self.user) + user, user_id = self.groups + if user_id is not None: + user = "id:" + user_id + + base = "{}/{}/".format(self.root, user) return self._dispatch_extractors(( (TwitterInfoExtractor , base + "info"), (TwitterAvatarExtractor , base + "photo"), diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index 524bd81a..e141a488 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -8,7 +8,7 @@ """Extractors for https://vsco.co/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co" @@ -132,15 +132,11 @@ class VscoExtractor(Extractor): return media -class VscoUserExtractor(VscoExtractor): +class VscoUserExtractor(Dispatch, VscoExtractor): """Extractor for a vsco user profile""" - subcategory = "user" pattern = USER_PATTERN + r"/?$" example = "https://vsco.co/USER" - def initialize(self): - pass - def items(self): base = "{}/{}/".format(self.root, self.user) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index e5b764ab..36febd23 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -8,7 +8,7 @@ """Extractors for https://wallhaven.cc/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, exception @@ -88,21 +88,13 @@ class WallhavenCollectionExtractor(WallhavenExtractor): return {"username": self.username, "collection_id": self.collection_id} -class WallhavenUserExtractor(WallhavenExtractor): +class WallhavenUserExtractor(Dispatch, WallhavenExtractor): """Extractor for a wallhaven user""" - subcategory = "user" pattern = r"(?:https?://)?wallhaven\.cc/user/([^/?#]+)/?$" example = "https://wallhaven.cc/user/USER" - def __init__(self, match): - WallhavenExtractor.__init__(self, match) - self.username = match.group(1) - - def initialize(self): - pass - def items(self): - base = "{}/user/{}/".format(self.root, self.username) + base = "{}/user/{}/".format(self.root, self.groups[0]) return self._dispatch_extractors(( (WallhavenUploadsExtractor , base + "uploads"), (WallhavenCollectionsExtractor, base + "favorites"), diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 3ed5a068..83a9c913 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -8,7 +8,7 @@ """Extractors for https://www.weibo.com/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text, util, exception from ..cache import cache import random @@ -258,7 +258,7 @@ class WeiboUserExtractor(WeiboExtractor): def items(self): base = "{}/u/{}?tabtype=".format(self.root, self._user_id()) - return self._dispatch_extractors(( + return Dispatch._dispatch_extractors(self, ( (WeiboHomeExtractor , base + "home"), (WeiboFeedExtractor , base + "feed"), (WeiboVideosExtractor , base + "video"),