[common] simplify 'user' extractors by using 'Dispatch' mixin

This commit is contained in:
Mike Fährmann
2025-05-23 21:26:13 +02:00
parent c3e8af945d
commit e199396872
16 changed files with 73 additions and 143 deletions

View File

@@ -8,7 +8,7 @@
"""Extractors for https://archiveofourown.org/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache
@@ -249,16 +249,12 @@ class Ao3SearchExtractor(Ao3Extractor):
example = "https://archiveofourown.org/works/search?work_search[query]=air"
class Ao3UserExtractor(Ao3Extractor):
class Ao3UserExtractor(Dispatch, Ao3Extractor):
"""Extractor for an AO3 user profile"""
subcategory = "user"
pattern = (BASE_PATTERN + r"/users/([^/?#]+(?:/pseuds/[^/?#]+)?)"
r"(?:/profile)?/?(?:$|\?|#)")
example = "https://archiveofourown.org/users/USER"
def initialize(self):
pass
def items(self):
base = "{}/users/{}/".format(self.root, self.groups[0])
return self._dispatch_extractors((

View File

@@ -8,7 +8,7 @@
"""Extractors for https://bsky.app/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache, memcache
@@ -210,14 +210,10 @@ class BlueskyExtractor(Extractor):
},)
class BlueskyUserExtractor(BlueskyExtractor):
subcategory = "user"
class BlueskyUserExtractor(Dispatch, BlueskyExtractor):
pattern = USER_PATTERN + r"$"
example = "https://bsky.app/profile/HANDLE"
def initialize(self):
pass
def items(self):
base = "{}/profile/{}/".format(self.root, self.groups[0])
default = ("posts" if self.config("quoted", False) or

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.civitai.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import memcache
import itertools
@@ -396,14 +396,10 @@ class CivitaiImagesExtractor(CivitaiExtractor):
return self.api.images(params)
class CivitaiUserExtractor(CivitaiExtractor):
subcategory = "user"
class CivitaiUserExtractor(Dispatch, CivitaiExtractor):
pattern = USER_PATTERN + r"/?(?:$|\?|#)"
example = "https://civitai.com/user/USER"
def initialize(self):
pass
def items(self):
base = "{}/user/{}/".format(self.root, self.groups[0])
return self._dispatch_extractors((

View File

@@ -616,29 +616,6 @@ class Extractor():
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
return get("date-min", dmin), get("date-max", dmax)
def _dispatch_extractors(self, extractor_data, default=()):
""" """
extractors = {
data[0].subcategory: data
for data in extractor_data
}
include = self.config("include", default) or ()
if include == "all":
include = extractors
elif isinstance(include, str):
include = include.replace(" ", "").split(",")
result = [(Message.Version, 1)]
for category in include:
try:
extr, url = extractors[category]
except KeyError:
self.log.warning("Invalid include '%s'", category)
else:
result.append((Message.Queue, url, {"_extractor": extr}))
return iter(result)
@classmethod
def _dump(cls, obj):
util.dump_json(obj, ensure_ascii=False, indent=2)
@@ -796,6 +773,41 @@ class MangaExtractor(Extractor):
"""Return a list of all (chapter-url, metadata)-tuples"""
class Dispatch():
subcategory = "user"
cookies_domain = None
finalize = Extractor.finalize
skip = Extractor.skip
def __iter__(self):
return self.items()
def initialize(self):
pass
def _dispatch_extractors(self, extractor_data, default=()):
extractors = {
data[0].subcategory: data
for data in extractor_data
}
include = self.config("include", default) or ()
if include == "all":
include = extractors
elif isinstance(include, str):
include = include.replace(" ", "").split(",")
result = [(Message.Version, 1)]
for category in include:
try:
extr, url = extractors[category]
except KeyError:
self.log.warning("Invalid include '%s'", category)
else:
result.append((Message.Queue, url, {"_extractor": extr}))
return iter(result)
class AsynchronousMixin():
"""Run info extraction in a separate thread"""

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.deviantart.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache, memcache
import collections
@@ -873,17 +873,11 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
.replace("\\\\", "\\")
class DeviantartUserExtractor(DeviantartExtractor):
class DeviantartUserExtractor(Dispatch, DeviantartExtractor):
"""Extractor for an artist's user profile"""
subcategory = "user"
pattern = BASE_PATTERN + r"/?$"
example = "https://www.deviantart.com/USER"
def initialize(self):
pass
skip = Extractor.skip
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.furaffinity.net/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net"
@@ -321,18 +321,11 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
return (post_id,)
class FuraffinityUserExtractor(FuraffinityExtractor):
class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor):
"""Extractor for furaffinity user profiles"""
subcategory = "user"
cookies_domain = None
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
example = "https://www.furaffinity.net/user/USER/"
def initialize(self):
pass
skip = Extractor.skip
def items(self):
base = "{}/{{}}/{}/".format(self.root, self.user)
return self._dispatch_extractors((

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.hentai-foundry.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util
BASE_PATTERN = r"(https?://)?(?:www\.)?hentai-foundry\.com"
@@ -192,15 +192,11 @@ class HentaifoundryExtractor(Extractor):
self.request(url, method="POST", data=data)
class HentaifoundryUserExtractor(HentaifoundryExtractor):
class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor):
"""Extractor for a hentaifoundry user profile"""
subcategory = "user"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
example = "https://www.hentai-foundry.com/user/USER/profile"
def initialize(self):
pass
def items(self):
root = self.root
user = "/user/" + self.user

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.instagram.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache, memcache
import itertools
@@ -430,18 +430,11 @@ class InstagramExtractor(Extractor):
user[key] = 0
class InstagramUserExtractor(InstagramExtractor):
class InstagramUserExtractor(Dispatch, InstagramExtractor):
"""Extractor for an Instagram user profile"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:$|[?#])"
example = "https://www.instagram.com/USER/"
def initialize(self):
pass
def finalize(self):
pass
def items(self):
base = "{}/{}/".format(self.root, self.item)
stories = "{}/stories/{}/".format(self.root, self.item)

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache
import itertools
@@ -450,15 +450,11 @@ class NewgroundsGamesExtractor(NewgroundsExtractor):
example = "https://USER.newgrounds.com/games"
class NewgroundsUserExtractor(NewgroundsExtractor):
class NewgroundsUserExtractor(Dispatch, NewgroundsExtractor):
"""Extractor for a newgrounds user profile"""
subcategory = "user"
pattern = USER_PATTERN + r"/?$"
example = "https://USER.newgrounds.com"
def initialize(self):
pass
def items(self):
base = self.user_root + "/"
return self._dispatch_extractors((

View File

@@ -8,7 +8,7 @@
"""Extractors for nijie instances"""
from .common import BaseExtractor, Message, AsynchronousMixin
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
from .. import text, exception
from ..cache import cache
@@ -177,16 +177,11 @@ BASE_PATTERN = NijieExtractor.update({
})
class NijieUserExtractor(NijieExtractor):
class NijieUserExtractor(Dispatch, NijieExtractor):
"""Extractor for nijie user profiles"""
subcategory = "user"
cookies_domain = None
pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
example = "https://nijie.info/members.php?id=12345"
def initialize(self):
pass
def items(self):
fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
return self._dispatch_extractors((

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache, memcache
from datetime import datetime, timedelta
@@ -367,23 +367,15 @@ class PixivExtractor(Extractor):
return {}
class PixivUserExtractor(PixivExtractor):
class PixivUserExtractor(Dispatch, PixivExtractor):
"""Extractor for a pixiv user profile"""
subcategory = "user"
pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
example = "https://www.pixiv.net/en/users/12345"
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)
def initialize(self):
pass
def items(self):
base = "{}/users/{}/".format(self.root, self.user_id)
base = "{}/users/{}/".format(self.root, self.groups[0])
return self._dispatch_extractors((
(PixivAvatarExtractor , base + "avatar"),
(PixivBackgroundExtractor , base + "background"),

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.pornhub.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, exception
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com"
@@ -164,21 +164,13 @@ class PornhubGifExtractor(PornhubExtractor):
yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
class PornhubUserExtractor(PornhubExtractor):
class PornhubUserExtractor(Dispatch, PornhubExtractor):
"""Extractor for a pornhub user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
example = "https://www.pornhub.com/model/USER"
def __init__(self, match):
PornhubExtractor.__init__(self, match)
self.user = match.group(1)
def initialize(self):
pass
def items(self):
base = "{}/{}/".format(self.root, self.user)
base = "{}/{}/".format(self.root, self.groups[0])
return self._dispatch_extractors((
(PornhubPhotosExtractor, base + "photos"),
(PornhubGifsExtractor , base + "gifs"),

View File

@@ -8,7 +8,7 @@
"""Extractors for https://x.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache, memcache
import itertools
@@ -577,27 +577,18 @@ class TwitterExtractor(Extractor):
return self.cookies_update(_login_impl(self, username, password))
class TwitterUserExtractor(TwitterExtractor):
class TwitterUserExtractor(Dispatch, TwitterExtractor):
"""Extractor for a Twitter user"""
subcategory = "user"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
example = "https://x.com/USER"
def __init__(self, match):
TwitterExtractor.__init__(self, match)
user_id = match.group(2)
if user_id:
self.user = "id:" + user_id
def initialize(self):
pass
def finalize(self):
pass
def items(self):
base = "{}/{}/".format(self.root, self.user)
user, user_id = self.groups
if user_id is not None:
user = "id:" + user_id
base = "{}/{}/".format(self.root, user)
return self._dispatch_extractors((
(TwitterInfoExtractor , base + "info"),
(TwitterAvatarExtractor , base + "photo"),

View File

@@ -8,7 +8,7 @@
"""Extractors for https://vsco.co/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co"
@@ -132,15 +132,11 @@ class VscoExtractor(Extractor):
return media
class VscoUserExtractor(VscoExtractor):
class VscoUserExtractor(Dispatch, VscoExtractor):
"""Extractor for a vsco user profile"""
subcategory = "user"
pattern = USER_PATTERN + r"/?$"
example = "https://vsco.co/USER"
def initialize(self):
pass
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((

View File

@@ -8,7 +8,7 @@
"""Extractors for https://wallhaven.cc/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, exception
@@ -88,21 +88,13 @@ class WallhavenCollectionExtractor(WallhavenExtractor):
return {"username": self.username, "collection_id": self.collection_id}
class WallhavenUserExtractor(WallhavenExtractor):
class WallhavenUserExtractor(Dispatch, WallhavenExtractor):
"""Extractor for a wallhaven user"""
subcategory = "user"
pattern = r"(?:https?://)?wallhaven\.cc/user/([^/?#]+)/?$"
example = "https://wallhaven.cc/user/USER"
def __init__(self, match):
WallhavenExtractor.__init__(self, match)
self.username = match.group(1)
def initialize(self):
pass
def items(self):
base = "{}/user/{}/".format(self.root, self.username)
base = "{}/user/{}/".format(self.root, self.groups[0])
return self._dispatch_extractors((
(WallhavenUploadsExtractor , base + "uploads"),
(WallhavenCollectionsExtractor, base + "favorites"),

View File

@@ -8,7 +8,7 @@
"""Extractors for https://www.weibo.com/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from ..cache import cache
import random
@@ -258,7 +258,7 @@ class WeiboUserExtractor(WeiboExtractor):
def items(self):
base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
return self._dispatch_extractors((
return Dispatch._dispatch_extractors(self, (
(WeiboHomeExtractor , base + "home"),
(WeiboFeedExtractor , base + "feed"),
(WeiboVideosExtractor , base + "video"),