rewrite URL patterns to use only 1 per extractor
This commit is contained in:
@@ -24,7 +24,7 @@ class ArtstationExtractor(Extractor):
|
||||
|
||||
def __init__(self, match=None):
|
||||
Extractor.__init__(self)
|
||||
self.user = match.group(1) if match else None
|
||||
self.user = match.group(1) or match.group(2) if match else None
|
||||
self.external = self.config("external", False)
|
||||
|
||||
def items(self):
|
||||
@@ -123,10 +123,9 @@ class ArtstationExtractor(Extractor):
|
||||
class ArtstationUserExtractor(ArtstationExtractor):
|
||||
"""Extractor for all projects of an artstation user"""
|
||||
subcategory = "user"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?&#]+)(?:/albums/all)?/?$",
|
||||
r"(?:https?://)?((?!www)\w+)\.artstation\.com"
|
||||
r"(?:/(?:projects/?)?)?$"]
|
||||
pattern = [r"(?:https?://)?(?:(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?&#]+)(?:/albums/all)?"
|
||||
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$"]
|
||||
test = [
|
||||
("https://www.artstation.com/gaerikim/", {
|
||||
"pattern": r"https://\w+\.artstation\.com/p/assets"
|
||||
@@ -149,10 +148,9 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
|
||||
directory_fmt = ["{category}", "{userinfo[username]}", "Albums",
|
||||
"{album[id]} - {album[title]}"]
|
||||
archive_fmt = "a_{album[id]}_{asset[id]}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?&#]+)/albums/(\d+)",
|
||||
r"(?:https?://)?((?!www)\w+)\.artstation\.com"
|
||||
r"/albums/(\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?&#]+)"
|
||||
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)"]
|
||||
test = [
|
||||
("https://www.artstation.com/huimeiye/albums/770899", {
|
||||
"count": 2,
|
||||
@@ -165,7 +163,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
ArtstationExtractor.__init__(self, match)
|
||||
self.album_id = text.parse_int(match.group(2))
|
||||
self.album_id = text.parse_int(match.group(3))
|
||||
|
||||
def metadata(self):
|
||||
userinfo = self.get_user_info(self.user)
|
||||
|
||||
@@ -21,10 +21,8 @@ class E621Extractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
||||
|
||||
class E621TagExtractor(booru.TagMixin, E621Extractor):
|
||||
"""Extractor for images from e621.net based on search-tags"""
|
||||
pattern = [
|
||||
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/(?P<tags>[^/?&#]+)",
|
||||
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=(?P<tags>[^&#]+)",
|
||||
]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post"
|
||||
r"(?:/index/\d+/|\?tags=)(?P<tags>[^/?&#]+)"]
|
||||
test = [
|
||||
("https://e621.net/post/index/1/anry", {
|
||||
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
|
||||
|
||||
@@ -45,9 +45,10 @@ class FlickrImageExtractor(FlickrExtractor):
|
||||
"""Extractor for individual images from flickr.com"""
|
||||
subcategory = "image"
|
||||
archive_fmt = "{id}"
|
||||
pattern = [r"(?:https?://)?(?:www\.|m\.)?flickr\.com/photos/[^/]+/(\d+)",
|
||||
r"(?:https?://)?[^.]+\.static\.?flickr\.com/(?:\d+/)+(\d+)_",
|
||||
r"(?:https?://)?flic\.kr/(p)/([A-Za-z1-9]+)"]
|
||||
pattern = [r"(?:https?://)?(?:"
|
||||
r"(?:(?:www\.|m\.)?flickr\.com/photos/[^/]+/"
|
||||
r"|[^.]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)"
|
||||
r"|flic\.kr/p/([A-Za-z1-9]+))"]
|
||||
test = [
|
||||
("https://www.flickr.com/photos/departingyyz/16089302239", {
|
||||
"url": "7f0887f5953f61c8b79a695cb102ea309c0346b0",
|
||||
@@ -66,7 +67,7 @@ class FlickrImageExtractor(FlickrExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
FlickrExtractor.__init__(self, match)
|
||||
if self.item_id == "p":
|
||||
if not self.item_id:
|
||||
alphabet = ("123456789abcdefghijkmnopqrstu"
|
||||
"vwxyzABCDEFGHJKLMNPQRSTUVWXYZ")
|
||||
self.item_id = util.bdecode(match.group(2), alphabet)
|
||||
|
||||
@@ -133,10 +133,9 @@ class ImagefapUserExtractor(ImagefapExtractor):
|
||||
"""Extractor for all galleries from a user at imagefap.com"""
|
||||
subcategory = "user"
|
||||
categorytransfer = True
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com"
|
||||
r"/profile(?:\.php\?user=|/)([^/?&#]+)"),
|
||||
(r"(?:https?://)?(?:www\.)?imagefap\.com"
|
||||
r"/usergallery\.php\?userid=(\d+)")]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/"
|
||||
r"(?:profile(?:\.php\?user=|/)([^/?&#]+)"
|
||||
r"|usergallery\.php\?userid=(\d+))"]
|
||||
test = [
|
||||
("https://www.imagefap.com/profile/LucyRae/galleries", {
|
||||
"url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd",
|
||||
@@ -149,12 +148,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
ImagefapExtractor.__init__(self)
|
||||
try:
|
||||
self.user_id = int(match.group(1))
|
||||
self.user = None
|
||||
except ValueError:
|
||||
self.user_id = None
|
||||
self.user = match.group(1)
|
||||
self.user, self.user_id = match.groups()
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
|
||||
@@ -70,9 +70,8 @@ class ImagehostImageExtractor(SharedConfigMixin, Extractor):
|
||||
class ImxtoImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from imx.to"""
|
||||
category = "imxto"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(imx\.to/i/(\w+))",
|
||||
r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
|
||||
r"/img-([a-z0-9]+)\.html)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
|
||||
r"/(?:i/|img-)(\w+)(\.html)?)"]
|
||||
test = (
|
||||
("https://imx.to/i/1qdeva", { # new-style URL
|
||||
"url": "ab2173088a6cdef631d7a47dec4a5da1c6a00130",
|
||||
|
||||
@@ -53,9 +53,8 @@ class ImgurImageExtractor(ImgurExtractor):
|
||||
subcategory = "image"
|
||||
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
|
||||
archive_fmt = "{hash}"
|
||||
pattern = [(r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
|
||||
r"/(?!gallery)(\w{7}|\w{5})"),
|
||||
(r"(?:https?://)?i\.imgur\.com/(\w{7}|\w{5})[sbtmlh]?\.")]
|
||||
pattern = [r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com"
|
||||
r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?"]
|
||||
test = [
|
||||
("https://imgur.com/21yMxCS", {
|
||||
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
|
||||
|
||||
@@ -28,11 +28,7 @@ class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
|
||||
|
||||
class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangapanda.com"""
|
||||
pattern = [
|
||||
(r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"),
|
||||
(r"(?:https?://)?(?:www\.)?mangapanda\.com"
|
||||
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+)\.html)"),
|
||||
]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"]
|
||||
test = [("https://www.mangapanda.com/red-storm/2", {
|
||||
"url": "1f633f776e950531ba9b1e81965316458e785261",
|
||||
"keyword": "32b5e84017c2bf5f122b339ecf40899e41f18cc9",
|
||||
|
||||
@@ -60,11 +60,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangareader.net"""
|
||||
archive_fmt = "{manga}_{chapter}_{page}"
|
||||
pattern = [
|
||||
(r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"),
|
||||
(r"(?:https?://)?(?:www\.)?mangareader\.net"
|
||||
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+)\.html)"),
|
||||
]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"]
|
||||
test = [(("https://www.mangareader.net/"
|
||||
"karate-shoukoushi-kohinata-minoru/11"), {
|
||||
"url": "061cc92a07edf17bb991ce0821fa4c77a147a860",
|
||||
|
||||
@@ -82,10 +82,9 @@ class PixivExtractor(Extractor):
|
||||
class PixivUserExtractor(PixivExtractor):
|
||||
"""Extractor for works of a pixiv-user"""
|
||||
subcategory = "user"
|
||||
pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
|
||||
r"/member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"),
|
||||
(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
|
||||
r"/(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+)()")]
|
||||
pattern = [r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/"
|
||||
r"(?:member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
|
||||
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))"]
|
||||
test = [
|
||||
("http://www.pixiv.net/member_illust.php?id=173530", {
|
||||
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
|
||||
@@ -107,7 +106,7 @@ class PixivUserExtractor(PixivExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
PixivExtractor.__init__(self)
|
||||
self.user_id = match.group(1)
|
||||
self.user_id = match.group(1) or match.group(3)
|
||||
self.query = text.parse_query(match.group(2))
|
||||
|
||||
def works(self):
|
||||
@@ -153,13 +152,11 @@ class PixivMeExtractor(PixivExtractor):
|
||||
class PixivWorkExtractor(PixivExtractor):
|
||||
"""Extractor for a single pixiv work/illustration"""
|
||||
subcategory = "work"
|
||||
pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
|
||||
r"/member(?:_illust)?\.php\?(?:[^&]+&)*illust_id=(\d+)"),
|
||||
(r"(?:https?://)?i(?:\d+\.pixiv|\.pximg)\.net"
|
||||
r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}"
|
||||
r"|img\d+/img/[^/]+)/(\d+)"),
|
||||
(r"(?:https?://)?img\d*\.pixiv\.net/img/[^/]+/(\d+)"),
|
||||
(r"(?:https?://)?(?:www\.)?pixiv\.net/i/(\d+)")]
|
||||
pattern = [r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
|
||||
r"/member(?:_illust)?\.php\?(?:[^&]+&)*illust_id=(\d+)"
|
||||
r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
|
||||
r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
|
||||
r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))"]
|
||||
test = [
|
||||
(("http://www.pixiv.net/member_illust.php"
|
||||
"?mode=medium&illust_id=966412"), {
|
||||
@@ -187,7 +184,7 @@ class PixivWorkExtractor(PixivExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
PixivExtractor.__init__(self)
|
||||
self.illust_id = match.group(1)
|
||||
self.illust_id = match.group(1) or match.group(2)
|
||||
self.load_ugoira = True
|
||||
self.work = None
|
||||
|
||||
|
||||
@@ -166,10 +166,9 @@ class SeigaImageExtractor(SeigaExtractor):
|
||||
"""Extractor for single images from seiga.nicovideo.jp"""
|
||||
subcategory = "image"
|
||||
filename_fmt = "{category}_{image_id}.{extension}"
|
||||
pattern = [(r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
|
||||
r"(?:seiga/im|image/source/)(\d+)"),
|
||||
(r"(?:https?://)?lohas\.nicoseiga\.jp/"
|
||||
r"(?:priv|o)/[^/]+/\d+/(\d+)")]
|
||||
pattern = [r"(?:https?://)?(?:"
|
||||
r"(?:www\.|seiga\.)?nicovideo\.jp/(?:seiga/im|image/source/)"
|
||||
r"|lohas\.nicoseiga\.jp/(?:priv|o)/[^/]+/\d+/)(\d+)"]
|
||||
test = [
|
||||
("http://seiga.nicovideo.jp/seiga/im5977527", {
|
||||
"keyword": "f66ba5de33d4ce2cb57f23bb37e1e847e0771c10",
|
||||
|
||||
Reference in New Issue
Block a user