[common] simplify 'user' extractors by using 'Dispatch' mixin
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://archiveofourown.org/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
|
||||
@@ -249,16 +249,12 @@ class Ao3SearchExtractor(Ao3Extractor):
|
||||
example = "https://archiveofourown.org/works/search?work_search[query]=air"
|
||||
|
||||
|
||||
class Ao3UserExtractor(Ao3Extractor):
|
||||
class Ao3UserExtractor(Dispatch, Ao3Extractor):
|
||||
"""Extractor for an AO3 user profile"""
|
||||
subcategory = "user"
|
||||
pattern = (BASE_PATTERN + r"/users/([^/?#]+(?:/pseuds/[^/?#]+)?)"
|
||||
r"(?:/profile)?/?(?:$|\?|#)")
|
||||
example = "https://archiveofourown.org/users/USER"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/users/{}/".format(self.root, self.groups[0])
|
||||
return self._dispatch_extractors((
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://bsky.app/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
|
||||
@@ -210,14 +210,10 @@ class BlueskyExtractor(Extractor):
|
||||
},)
|
||||
|
||||
|
||||
class BlueskyUserExtractor(BlueskyExtractor):
|
||||
subcategory = "user"
|
||||
class BlueskyUserExtractor(Dispatch, BlueskyExtractor):
|
||||
pattern = USER_PATTERN + r"$"
|
||||
example = "https://bsky.app/profile/HANDLE"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/profile/{}/".format(self.root, self.groups[0])
|
||||
default = ("posts" if self.config("quoted", False) or
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.civitai.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import memcache
|
||||
import itertools
|
||||
@@ -396,14 +396,10 @@ class CivitaiImagesExtractor(CivitaiExtractor):
|
||||
return self.api.images(params)
|
||||
|
||||
|
||||
class CivitaiUserExtractor(CivitaiExtractor):
|
||||
subcategory = "user"
|
||||
class CivitaiUserExtractor(Dispatch, CivitaiExtractor):
|
||||
pattern = USER_PATTERN + r"/?(?:$|\?|#)"
|
||||
example = "https://civitai.com/user/USER"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/user/{}/".format(self.root, self.groups[0])
|
||||
return self._dispatch_extractors((
|
||||
|
||||
@@ -616,29 +616,6 @@ class Extractor():
|
||||
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
|
||||
return get("date-min", dmin), get("date-max", dmax)
|
||||
|
||||
def _dispatch_extractors(self, extractor_data, default=()):
|
||||
""" """
|
||||
extractors = {
|
||||
data[0].subcategory: data
|
||||
for data in extractor_data
|
||||
}
|
||||
|
||||
include = self.config("include", default) or ()
|
||||
if include == "all":
|
||||
include = extractors
|
||||
elif isinstance(include, str):
|
||||
include = include.replace(" ", "").split(",")
|
||||
|
||||
result = [(Message.Version, 1)]
|
||||
for category in include:
|
||||
try:
|
||||
extr, url = extractors[category]
|
||||
except KeyError:
|
||||
self.log.warning("Invalid include '%s'", category)
|
||||
else:
|
||||
result.append((Message.Queue, url, {"_extractor": extr}))
|
||||
return iter(result)
|
||||
|
||||
@classmethod
|
||||
def _dump(cls, obj):
|
||||
util.dump_json(obj, ensure_ascii=False, indent=2)
|
||||
@@ -796,6 +773,41 @@ class MangaExtractor(Extractor):
|
||||
"""Return a list of all (chapter-url, metadata)-tuples"""
|
||||
|
||||
|
||||
class Dispatch():
|
||||
subcategory = "user"
|
||||
cookies_domain = None
|
||||
finalize = Extractor.finalize
|
||||
skip = Extractor.skip
|
||||
|
||||
def __iter__(self):
|
||||
return self.items()
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def _dispatch_extractors(self, extractor_data, default=()):
|
||||
extractors = {
|
||||
data[0].subcategory: data
|
||||
for data in extractor_data
|
||||
}
|
||||
|
||||
include = self.config("include", default) or ()
|
||||
if include == "all":
|
||||
include = extractors
|
||||
elif isinstance(include, str):
|
||||
include = include.replace(" ", "").split(",")
|
||||
|
||||
result = [(Message.Version, 1)]
|
||||
for category in include:
|
||||
try:
|
||||
extr, url = extractors[category]
|
||||
except KeyError:
|
||||
self.log.warning("Invalid include '%s'", category)
|
||||
else:
|
||||
result.append((Message.Queue, url, {"_extractor": extr}))
|
||||
return iter(result)
|
||||
|
||||
|
||||
class AsynchronousMixin():
|
||||
"""Run info extraction in a separate thread"""
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.deviantart.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
import collections
|
||||
@@ -873,17 +873,11 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
|
||||
.replace("\\\\", "\\")
|
||||
|
||||
|
||||
class DeviantartUserExtractor(DeviantartExtractor):
|
||||
class DeviantartUserExtractor(Dispatch, DeviantartExtractor):
|
||||
"""Extractor for an artist's user profile"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/?$"
|
||||
example = "https://www.deviantart.com/USER"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
skip = Extractor.skip
|
||||
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
return self._dispatch_extractors((
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.furaffinity.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net"
|
||||
@@ -321,18 +321,11 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
|
||||
return (post_id,)
|
||||
|
||||
|
||||
class FuraffinityUserExtractor(FuraffinityExtractor):
|
||||
class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor):
|
||||
"""Extractor for furaffinity user profiles"""
|
||||
subcategory = "user"
|
||||
cookies_domain = None
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
|
||||
example = "https://www.furaffinity.net/user/USER/"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
skip = Extractor.skip
|
||||
|
||||
def items(self):
|
||||
base = "{}/{{}}/{}/".format(self.root, self.user)
|
||||
return self._dispatch_extractors((
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.hentai-foundry.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util
|
||||
|
||||
BASE_PATTERN = r"(https?://)?(?:www\.)?hentai-foundry\.com"
|
||||
@@ -192,15 +192,11 @@ class HentaifoundryExtractor(Extractor):
|
||||
self.request(url, method="POST", data=data)
|
||||
|
||||
|
||||
class HentaifoundryUserExtractor(HentaifoundryExtractor):
|
||||
class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor):
|
||||
"""Extractor for a hentaifoundry user profile"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
|
||||
example = "https://www.hentai-foundry.com/user/USER/profile"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
root = self.root
|
||||
user = "/user/" + self.user
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
"""Extractors for https://www.instagram.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
import itertools
|
||||
@@ -430,18 +430,11 @@ class InstagramExtractor(Extractor):
|
||||
user[key] = 0
|
||||
|
||||
|
||||
class InstagramUserExtractor(InstagramExtractor):
|
||||
class InstagramUserExtractor(Dispatch, InstagramExtractor):
|
||||
"""Extractor for an Instagram user profile"""
|
||||
subcategory = "user"
|
||||
pattern = USER_PATTERN + r"/?(?:$|[?#])"
|
||||
example = "https://www.instagram.com/USER/"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def finalize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.item)
|
||||
stories = "{}/stories/{}/".format(self.root, self.item)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.newgrounds.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
import itertools
|
||||
@@ -450,15 +450,11 @@ class NewgroundsGamesExtractor(NewgroundsExtractor):
|
||||
example = "https://USER.newgrounds.com/games"
|
||||
|
||||
|
||||
class NewgroundsUserExtractor(NewgroundsExtractor):
|
||||
class NewgroundsUserExtractor(Dispatch, NewgroundsExtractor):
|
||||
"""Extractor for a newgrounds user profile"""
|
||||
subcategory = "user"
|
||||
pattern = USER_PATTERN + r"/?$"
|
||||
example = "https://USER.newgrounds.com"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = self.user_root + "/"
|
||||
return self._dispatch_extractors((
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for nijie instances"""
|
||||
|
||||
from .common import BaseExtractor, Message, AsynchronousMixin
|
||||
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
|
||||
from .. import text, exception
|
||||
from ..cache import cache
|
||||
|
||||
@@ -177,16 +177,11 @@ BASE_PATTERN = NijieExtractor.update({
|
||||
})
|
||||
|
||||
|
||||
class NijieUserExtractor(NijieExtractor):
|
||||
class NijieUserExtractor(Dispatch, NijieExtractor):
|
||||
"""Extractor for nijie user profiles"""
|
||||
subcategory = "user"
|
||||
cookies_domain = None
|
||||
pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
|
||||
example = "https://nijie.info/members.php?id=12345"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
|
||||
return self._dispatch_extractors((
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.pixiv.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
from datetime import datetime, timedelta
|
||||
@@ -367,23 +367,15 @@ class PixivExtractor(Extractor):
|
||||
return {}
|
||||
|
||||
|
||||
class PixivUserExtractor(PixivExtractor):
|
||||
class PixivUserExtractor(Dispatch, PixivExtractor):
|
||||
"""Extractor for a pixiv user profile"""
|
||||
subcategory = "user"
|
||||
pattern = (BASE_PATTERN + r"/(?:"
|
||||
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
|
||||
r")(\d+)(?:$|[?#])")
|
||||
example = "https://www.pixiv.net/en/users/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
PixivExtractor.__init__(self, match)
|
||||
self.user_id = match.group(1)
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/users/{}/".format(self.root, self.user_id)
|
||||
base = "{}/users/{}/".format(self.root, self.groups[0])
|
||||
return self._dispatch_extractors((
|
||||
(PixivAvatarExtractor , base + "avatar"),
|
||||
(PixivBackgroundExtractor , base + "background"),
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.pornhub.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, exception
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com"
|
||||
@@ -164,21 +164,13 @@ class PornhubGifExtractor(PornhubExtractor):
|
||||
yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
|
||||
|
||||
|
||||
class PornhubUserExtractor(PornhubExtractor):
|
||||
class PornhubUserExtractor(Dispatch, PornhubExtractor):
|
||||
"""Extractor for a pornhub user"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
|
||||
example = "https://www.pornhub.com/model/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
PornhubExtractor.__init__(self, match)
|
||||
self.user = match.group(1)
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
base = "{}/{}/".format(self.root, self.groups[0])
|
||||
return self._dispatch_extractors((
|
||||
(PornhubPhotosExtractor, base + "photos"),
|
||||
(PornhubGifsExtractor , base + "gifs"),
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://x.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
import itertools
|
||||
@@ -577,27 +577,18 @@ class TwitterExtractor(Extractor):
|
||||
return self.cookies_update(_login_impl(self, username, password))
|
||||
|
||||
|
||||
class TwitterUserExtractor(TwitterExtractor):
|
||||
class TwitterUserExtractor(Dispatch, TwitterExtractor):
|
||||
"""Extractor for a Twitter user"""
|
||||
subcategory = "user"
|
||||
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
|
||||
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
|
||||
example = "https://x.com/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
TwitterExtractor.__init__(self, match)
|
||||
user_id = match.group(2)
|
||||
if user_id:
|
||||
self.user = "id:" + user_id
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def finalize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
user, user_id = self.groups
|
||||
if user_id is not None:
|
||||
user = "id:" + user_id
|
||||
|
||||
base = "{}/{}/".format(self.root, user)
|
||||
return self._dispatch_extractors((
|
||||
(TwitterInfoExtractor , base + "info"),
|
||||
(TwitterAvatarExtractor , base + "photo"),
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://vsco.co/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co"
|
||||
@@ -132,15 +132,11 @@ class VscoExtractor(Extractor):
|
||||
return media
|
||||
|
||||
|
||||
class VscoUserExtractor(VscoExtractor):
|
||||
class VscoUserExtractor(Dispatch, VscoExtractor):
|
||||
"""Extractor for a vsco user profile"""
|
||||
subcategory = "user"
|
||||
pattern = USER_PATTERN + r"/?$"
|
||||
example = "https://vsco.co/USER"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
return self._dispatch_extractors((
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://wallhaven.cc/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, exception
|
||||
|
||||
|
||||
@@ -88,21 +88,13 @@ class WallhavenCollectionExtractor(WallhavenExtractor):
|
||||
return {"username": self.username, "collection_id": self.collection_id}
|
||||
|
||||
|
||||
class WallhavenUserExtractor(WallhavenExtractor):
|
||||
class WallhavenUserExtractor(Dispatch, WallhavenExtractor):
|
||||
"""Extractor for a wallhaven user"""
|
||||
subcategory = "user"
|
||||
pattern = r"(?:https?://)?wallhaven\.cc/user/([^/?#]+)/?$"
|
||||
example = "https://wallhaven.cc/user/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
WallhavenExtractor.__init__(self, match)
|
||||
self.username = match.group(1)
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/user/{}/".format(self.root, self.username)
|
||||
base = "{}/user/{}/".format(self.root, self.groups[0])
|
||||
return self._dispatch_extractors((
|
||||
(WallhavenUploadsExtractor , base + "uploads"),
|
||||
(WallhavenCollectionsExtractor, base + "favorites"),
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for https://www.weibo.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
import random
|
||||
@@ -258,7 +258,7 @@ class WeiboUserExtractor(WeiboExtractor):
|
||||
|
||||
def items(self):
|
||||
base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
|
||||
return self._dispatch_extractors((
|
||||
return Dispatch._dispatch_extractors(self, (
|
||||
(WeiboHomeExtractor , base + "home"),
|
||||
(WeiboFeedExtractor , base + "feed"),
|
||||
(WeiboVideosExtractor , base + "video"),
|
||||
|
||||
Reference in New Issue
Block a user