'match.group(N)' -> 'match[N]' (#7671)

2.5x faster
This commit is contained in:
Mike Fährmann
2025-06-18 12:59:37 +02:00
parent 475506cc39
commit 41191bb60a
135 changed files with 363 additions and 363 deletions

View File

@@ -68,7 +68,7 @@ class _2chBoardExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.board = match.group(1)
self.board = match[1]
def items(self):
# index page

View File

@@ -86,7 +86,7 @@ class _2chenBoardExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.board = match.group(1)
self.board = match[1]
def items(self):
url = "{}/{}/catalog".format(self.root, self.board)

View File

@@ -104,7 +104,7 @@ class _35photoUserExtractor(_35photoExtractor):
def __init__(self, match):
_35photoExtractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
self.user_id = 0
def metadata(self):
@@ -133,7 +133,7 @@ class _35photoTagExtractor(_35photoExtractor):
def __init__(self, match):
_35photoExtractor.__init__(self, match)
self.tag = match.group(1)
self.tag = match[1]
def metadata(self):
return {"search_tag": text.unquote(self.tag).lower()}
@@ -198,7 +198,7 @@ class _35photoImageExtractor(_35photoExtractor):
def __init__(self, match):
_35photoExtractor.__init__(self, match)
self.photo_id = match.group(1)
self.photo_id = match[1]
def photos(self):
return (self.photo_id,)

View File

@@ -93,8 +93,8 @@ class _4archiveBoardExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.board = match.group(1)
self.num = text.parse_int(match.group(2), 1)
self.board = match[1]
self.num = text.parse_int(match[2], 1)
def items(self):
data = {"_extractor": _4archiveThreadExtractor}

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2023 Mike Fährmann
# Copyright 2015-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -59,7 +59,7 @@ class _4chanBoardExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.board = match.group(1)
self.board = match[1]
def items(self):
url = "https://a.4cdn.org/{}/threads.json".format(self.board)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2023 Mike Fährmann
# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -97,7 +97,7 @@ class _500pxUserExtractor(_500pxExtractor):
def __init__(self, match):
_500pxExtractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
def photos(self):
variables = {"username": self.user, "pageSize": 20}
@@ -207,7 +207,7 @@ class _500pxImageExtractor(_500pxExtractor):
def __init__(self, match):
_500pxExtractor.__init__(self, match)
self.photo_id = match.group(1)
self.photo_id = match[1]
def photos(self):
edges = ({"node": {"legacyId": self.photo_id}},)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2022-2023 Mike Fährmann
# Copyright 2022-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,7 +23,7 @@ class _8chanExtractor(Extractor):
root = "https://8chan.moe"
def __init__(self, match):
self.root = "https://8chan." + match.group(1)
self.root = "https://8chan." + match[1]
Extractor.__init__(self, match)
@memcache()

View File

@@ -26,8 +26,8 @@ class _8musesAlbumExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.path = match.group(1)
self.params = match.group(2) or ""
self.path = match[1]
self.params = match[2] or ""
def items(self):
url = self.root + self.path + self.params

View File

@@ -22,7 +22,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
def __init__(self, match):
GalleryExtractor.__init__(self, match)
self.gallery_id = match.group(2)
self.gallery_id = match[2]
def _init(self):
self.cookies.set("ageConfirmed", "true", domain="www.adultempire.com")

View File

@@ -24,7 +24,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
example = "https://architizer.com/projects/NAME/"
def __init__(self, match):
url = "{}/projects/{}/".format(self.root, match.group(1))
url = "{}/projects/{}/".format(self.root, match[1])
GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
@@ -68,7 +68,7 @@ class ArchitizerFirmExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.firm = match.group(1)
self.firm = match[1]
def items(self):
url = url = "{}/firms/{}/?requesting_merlin=pages".format(

View File

@@ -25,7 +25,7 @@ class ArtstationExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1) or match.group(2)
self.user = match[1] or match[2]
def _init(self):
self.session.headers["Cache-Control"] = "max-age=0"
@@ -215,7 +215,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
self.album_id = text.parse_int(match.group(3))
self.album_id = text.parse_int(match[3])
def metadata(self):
userinfo = self.get_user_info(self.user)
@@ -264,7 +264,7 @@ class ArtstationCollectionExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
self.collection_id = match.group(2)
self.collection_id = match[2]
def metadata(self):
url = "{}/collections/{}.json".format(
@@ -314,8 +314,8 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
self.challenge_id = match.group(1)
self.sorting = match.group(2) or "popular"
self.challenge_id = match[1]
self.sorting = match[2] or "popular"
def items(self):
challenge_url = "{}/contests/_/challenges/{}.json".format(
@@ -359,7 +359,7 @@ class ArtstationSearchExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
self.params = query = text.parse_query(match.group(1))
self.params = query = text.parse_query(match[1])
self.query = text.unquote(query.get("query") or query.get("q", ""))
self.sorting = query.get("sort_by", "relevance").lower()
self.tags = query.get("tags", "").split(",")
@@ -406,7 +406,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
self.query = text.parse_query(match.group(1))
self.query = text.parse_query(match[1])
def metadata(self):
return {"artwork": self.query}
@@ -426,7 +426,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
self.project_id = match.group(1)
self.project_id = match[1]
self.assets = None
def metadata(self):

View File

@@ -29,7 +29,7 @@ class AryionExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
self.recursive = True
def login(self):

View File

@@ -88,7 +88,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
def __init__(self, match):
BehanceExtractor.__init__(self, match)
self.gallery_id = match.group(1)
self.gallery_id = match[1]
def _init(self):
BehanceExtractor._init(self)
@@ -229,7 +229,7 @@ class BehanceUserExtractor(BehanceExtractor):
def __init__(self, match):
BehanceExtractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
def galleries(self):
endpoint = "GetProfileProjects"
@@ -257,7 +257,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
def __init__(self, match):
BehanceExtractor.__init__(self, match)
self.collection_id = match.group(1)
self.collection_id = match[1]
def galleries(self):
endpoint = "GetMoodboardItemsAndRecommendations"

View File

@@ -104,7 +104,7 @@ class BloggerPostExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
self.path = match.group(match.lastindex)
self.path = match[match.lastindex]
def posts(self, blog):
return (self.api.post_by_path(blog["id"], self.path),)
@@ -128,7 +128,7 @@ class BloggerSearchExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
self.query = text.unquote(match.group(match.lastindex))
self.query = text.unquote(match[match.lastindex])
def posts(self, blog):
return self.api.blog_search(blog["id"], self.query)
@@ -145,7 +145,7 @@ class BloggerLabelExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
self.label = text.unquote(match.group(match.lastindex))
self.label = text.unquote(match[match.lastindex])
def posts(self, blog):
return self.api.blog_posts(blog["id"], self.label)

View File

@@ -20,7 +20,7 @@ class CienExtractor(Extractor):
request_interval = (1.0, 2.0)
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
self.root = text.root_from_url(match[0])
Extractor.__init__(self, match)
def _init(self):

View File

@@ -926,7 +926,7 @@ class BaseExtractor(Extractor):
if index:
self.category, self.root, info = self.instances[index-1]
if not self.root:
self.root = text.root_from_url(self.match.group(0))
self.root = text.root_from_url(self.match[0])
self.config_instance = info.get
else:
self.root = group

View File

@@ -46,7 +46,7 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor):
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
self.year = match.group(1)
self.year = match[1]
def items(self):
url = "{}/exhibition-{}/".format(self.root, self.year)
@@ -75,7 +75,7 @@ class DesktopographyEntryExtractor(DesktopographyExtractor):
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
self.entry = match.group(1)
self.entry = match[1]
def items(self):
url = "{}/portfolios/{}".format(self.root, self.entry)

View File

@@ -36,7 +36,7 @@ class DeviantartExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = (match.group(1) or match.group(2) or "").lower()
self.user = (match[1] or match[2] or "").lower()
self.offset = 0
def _init(self):
@@ -227,7 +227,7 @@ class DeviantartExtractor(Extractor):
if txt is None:
continue
for match in DeviantartStashExtractor.pattern.finditer(txt):
url = text.ensure_http_scheme(match.group(0))
url = text.ensure_http_scheme(match[0])
deviation["_extractor"] = DeviantartStashExtractor
yield Message.Queue, url, deviation
@@ -988,8 +988,8 @@ class DeviantartFolderExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.folder = None
self.folder_id = match.group(3)
self.folder_name = match.group(4)
self.folder_id = match[3]
self.folder_name = match[4]
def deviations(self):
folders = self.api.gallery_folders(self.user)
@@ -1123,8 +1123,8 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.collection = None
self.collection_id = match.group(3)
self.collection_name = match.group(4)
self.collection_id = match[3]
self.collection_name = match[4]
def deviations(self):
folders = self.api.collections_folders(self.user)
@@ -1226,7 +1226,7 @@ class DeviantartTagExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.tag = text.unquote(match.group(1))
self.tag = text.unquote(match[1])
self.user = ""
def deviations(self):
@@ -1276,9 +1276,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.type = match.group(3)
self.type = match[3]
self.deviation_id = \
match.group(4) or match.group(5) or id_from_base36(match.group(6))
match[4] or match[5] or id_from_base36(match[6])
def deviations(self):
if self.user:
@@ -1399,7 +1399,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.query = match.group(3)
self.query = match[3]
def deviations(self):
self.login()

View File

@@ -55,10 +55,10 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
group = extr('"icon-print"></i> ', '</span>')
return {
"manga" : text.unescape(match.group(1)),
"chapter" : text.parse_int(match.group(2)),
"chapter_minor": match.group(3) or "",
"title" : text.unescape(match.group(4) or ""),
"manga" : text.unescape(match[1]),
"chapter" : text.parse_int(match[2]),
"chapter_minor": match[3] or "",
"title" : text.unescape(match[4] or ""),
"author" : text.remove_html(author),
"group" : (text.remove_html(group) or
text.extr(group, ' alt="', '"')),
@@ -102,7 +102,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.query = match.group(1) or ""
self.query = match[1] or ""
def items(self):
yield Message.Directory, {}

View File

@@ -34,7 +34,7 @@ class ExhentaiExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.version = match.group(1)
self.version = match[1]
def initialize(self):
domain = self.config("domain", "auto")
@@ -122,10 +122,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def __init__(self, match):
ExhentaiExtractor.__init__(self, match)
self.gallery_id = text.parse_int(match.group(2) or match.group(5))
self.gallery_token = match.group(3)
self.image_token = match.group(4)
self.image_num = text.parse_int(match.group(6), 1)
self.gallery_id = text.parse_int(match[2] or match[5])
self.gallery_token = match[3]
self.image_token = match[4]
self.image_num = text.parse_int(match[6], 1)
self.key_start = None
self.key_show = None
self.key_next = None
@@ -573,7 +573,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
def __init__(self, match):
ExhentaiExtractor.__init__(self, match)
_, query, tag = match.groups()
_, query, tag = self.groups
if tag:
if "+" in tag:
ns, _, tag = tag.rpartition(":")
@@ -599,13 +599,13 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
last = None
page = self.request(search_url, params=params).text
for gallery in ExhentaiGalleryExtractor.pattern.finditer(page):
url = gallery.group(0)
for match in ExhentaiGalleryExtractor.pattern.finditer(page):
url = match[0]
if url == last:
continue
last = url
data["gallery_id"] = text.parse_int(gallery.group(2))
data["gallery_token"] = gallery.group(3)
data["gallery_id"] = text.parse_int(match[2])
data["gallery_token"] = match[3]
yield Message.Queue, url + "/", data
next_url = text.extr(page, 'nexturl="', '"', None)

View File

@@ -351,7 +351,7 @@ class FanboxCreatorExtractor(FanboxExtractor):
def __init__(self, match):
FanboxExtractor.__init__(self, match)
self.creator_id = match.group(1) or match.group(2)
self.creator_id = match[1] or match[2]
def posts(self):
url = "https://api.fanbox.cc/post.paginateCreator?creatorId="
@@ -378,7 +378,7 @@ class FanboxPostExtractor(FanboxExtractor):
def __init__(self, match):
FanboxExtractor.__init__(self, match)
self.post_id = match.group(3)
self.post_id = match[3]
def posts(self):
return (self._get_post_data(self.post_id),)

View File

@@ -186,7 +186,7 @@ class FantiaCreatorExtractor(FantiaExtractor):
def __init__(self, match):
FantiaExtractor.__init__(self, match)
self.creator_id = match.group(1)
self.creator_id = match[1]
def posts(self):
url = "{}/fanclubs/{}/posts".format(self.root, self.creator_id)
@@ -201,7 +201,7 @@ class FantiaPostExtractor(FantiaExtractor):
def __init__(self, match):
FantiaExtractor.__init__(self, match)
self.post_id = match.group(1)
self.post_id = match[1]
def posts(self):
self._csrf_token()

View File

@@ -50,8 +50,8 @@ class FapachiUserExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
self.num = text.parse_int(match.group(2), 1)
self.user = match[1]
self.num = text.parse_int(match[2], 1)
def items(self):
data = {"_extractor": FapachiPostExtractor}

View File

@@ -25,7 +25,7 @@ class FapelloPostExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.root = text.root_from_url(match.group(0))
self.root = text.root_from_url(match[0])
self.model, self.id = match.groups()
def items(self):
@@ -59,8 +59,8 @@ class FapelloModelExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.root = text.root_from_url(match.group(0))
self.model = match.group(1)
self.root = text.root_from_url(match[0])
self.model = match[1]
def items(self):
num = 1
@@ -93,8 +93,8 @@ class FapelloPathExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.root = text.root_from_url(match.group(0))
self.path = match.group(1)
self.root = text.root_from_url(match[0])
self.path = match[1]
def items(self):
num = 1

View File

@@ -273,9 +273,9 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
board = match.group(match.lastindex)
board = match[match.lastindex]
if board.isdecimal():
self.board = match.group(match.lastindex-1)
self.board = match[match.lastindex-1]
self.pages = (board,)
else:
self.board = board

View File

@@ -18,7 +18,7 @@ class FoolslideExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
self.gallery_url = self.root + match.group(match.lastindex)
self.gallery_url = self.root + match[match.lastindex]
def request(self, url):
return BaseExtractor.request(

View File

@@ -28,7 +28,7 @@ class FuraffinityExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
self.offset = 0
def _init(self):
@@ -297,7 +297,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
def __init__(self, match):
FuraffinityExtractor.__init__(self, match)
self.query = text.parse_query(match.group(2))
self.query = text.parse_query(match[2])
if self.user and "q" not in self.query:
self.query["q"] = text.unquote(self.user)

View File

@@ -21,7 +21,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
example = "https://fuskator.com/thumbs/ID/"
def __init__(self, match):
self.gallery_hash = match.group(1)
self.gallery_hash = match[1]
url = "{}/thumbs/{}/index.html".format(self.root, self.gallery_hash)
GalleryExtractor.__init__(self, match, url)
@@ -72,7 +72,7 @@ class FuskatorSearchExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.path = match.group(1)
self.path = match[1]
def items(self):
url = self.root + self.path

View File

@@ -292,7 +292,7 @@ class GelbooruRedirectExtractor(GelbooruBase, Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.url_base64 = match.group(1)
self.url_base64 = match[1]
def items(self):
url = text.ensure_http_scheme(binascii.a2b_base64(

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2021-2023 Mike Fährmann
# Copyright 2021-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -94,7 +94,7 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
self.tags = match.group(match.lastindex)
self.tags = match[match.lastindex]
def metadata(self):
return {"search_tags": text.unquote(self.tags.replace("+", " "))}
@@ -115,7 +115,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
self.favorite_id = match.group(match.lastindex)
self.favorite_id = match[match.lastindex]
def metadata(self):
return {"favorite_id": text.parse_int(self.favorite_id)}
@@ -134,7 +134,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
self.post_id = match.group(match.lastindex)
self.post_id = match[match.lastindex]
def posts(self):
return (self._parse_post(self.post_id),)

View File

@@ -163,7 +163,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
tags = match.group(match.lastindex)
tags = match[match.lastindex]
self.tags = text.unquote(tags.replace("+", " "))
def metadata(self):
@@ -184,7 +184,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
self.pool_id = match.group(match.lastindex)
self.pool_id = match[match.lastindex]
if self.category == "rule34":
self.posts = self._posts_pages
@@ -236,7 +236,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
self.favorite_id = match.group(match.lastindex)
self.favorite_id = match[match.lastindex]
def metadata(self):
return {"favorite_id": text.parse_int(self.favorite_id)}
@@ -257,7 +257,7 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
self.post_id = match.group(match.lastindex)
self.post_id = match[match.lastindex]
def posts(self):
return self._pagination({"id": self.post_id})

View File

@@ -36,28 +36,28 @@ class GenericExtractor(Extractor):
example = "generic:https://www.nongnu.org/lzip/"
def __init__(self, match):
self.subcategory = match.group('domain')
self.subcategory = match['domain']
Extractor.__init__(self, match)
# Strip the "g(eneric):" prefix
# and inform about "forced" or "fallback" mode
if match.group('generic'):
self.url = match.group(0).partition(":")[2]
if match['generic']:
self.url = match[0].partition(":")[2]
else:
self.log.info("Falling back on generic information extractor.")
self.url = match.group(0)
self.url = match[0]
# Make sure we have a scheme, or use https
if match.group('scheme'):
self.scheme = match.group('scheme')
if match['scheme']:
self.scheme = match['scheme']
else:
self.scheme = 'https://'
self.url = text.ensure_http_scheme(self.url, self.scheme)
self.path = match.group('path')
self.path = match['path']
# Used to resolve relative image urls
self.root = self.scheme + match.group('domain')
self.root = self.scheme + match['domain']
def items(self):
"""Get page, extract metadata & images, yield them in suitable messages
@@ -184,7 +184,7 @@ class GenericExtractor(Extractor):
basematch = util.re(
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)").search(page)
if basematch:
self.baseurl = basematch.group('url').rstrip('/')
self.baseurl = basematch['url'].rstrip('/')
# Otherwise, extract the base url from self.url
else:
if self.url.endswith("/"):

View File

@@ -23,7 +23,7 @@ class GofileFolderExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.content_id = match.group(1)
self.content_id = match[1]
def items(self):
recursive = self.config("recursive")

View File

@@ -27,7 +27,7 @@ class HatenablogExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.domain = match.group(1) or match.group(2)
self.domain = match[1] or match[2]
def _init(self):
self._find_img = util.re(r'<img +([^>]+)').finditer
@@ -42,8 +42,8 @@ class HatenablogExtractor(Extractor):
'<div class="entry-content hatenablog-entry">', '</div>')
images = []
for i in self._find_img(content):
attributes = i.group(1)
for match in self._find_img(content):
attributes = match[1]
if 'class="hatena-fotolife"' not in attributes:
continue
image = text.unescape(text.extr(attributes, 'src="', '"'))
@@ -67,9 +67,9 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
self.path = match.group(3)
self.path = match[3]
self.query = {key: value for key, value in text.parse_query(
match.group(4)).items() if self._acceptable_query(key)}
match[4]).items() if self._acceptable_query(key)}
def _init(self):
HatenablogExtractor._init(self)
@@ -91,7 +91,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
yield from self._handle_full_articles(extr)
match = self._find_pager_url(page)
url = text.unescape(match.group(1)) if match else None
url = text.unescape(match[1]) if match else None
query = None
def _handle_partial_articles(self, extr):
@@ -128,7 +128,7 @@ class HatenablogEntryExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
self.path = match.group(3)
self.path = match[3]
def items(self):
url = "https://" + self.domain + "/entry/" + self.path

View File

@@ -25,8 +25,8 @@ class HentaifoundryExtractor(Extractor):
per_page = 25
def __init__(self, match):
self.root = (match.group(1) or "https://") + "www.hentai-foundry.com"
self.user = match.group(2)
self.root = (match[1] or "https://") + "www.hentai-foundry.com"
self.user = match[2]
Extractor.__init__(self, match)
self.page_url = ""
self.start_post = 0
@@ -306,7 +306,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
self.index = match.group(3)
self.index = match[3]
def items(self):
post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format(
@@ -347,7 +347,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
self.index = match.group(3)
self.index = match[3]
def items(self):
story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020-2023 Mike Fährmann
# Copyright 2020-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,7 +20,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
example = "https://hentaihand.com/en/comic/TITLE"
def __init__(self, match):
self.slug = match.group(1)
self.slug = match[1]
url = "{}/api/comics/{}".format(self.root, self.slug)
GalleryExtractor.__init__(self, match, url)

View File

@@ -37,14 +37,14 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by "
r"(.+) at ").match(title)
return {
"manga": match.group(1),
"manga": match[1],
"manga_id": text.parse_int(self.manga_id),
"chapter": text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": text.parse_int(chapter_id),
"type": match.group(2),
"title": match.group(3),
"author": match.group(4),
"type": match[2],
"title": match[3],
"author": match[4],
"lang": "en",
"language": "English",
}

View File

@@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
example = "https://hentainexus.com/view/12345"
def __init__(self, match):
self.gallery_id = match.group(1)
self.gallery_id = match[1]
url = "{}/view/{}".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)

View File

@@ -129,8 +129,8 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
example = "https://hiperdex.com/manga-artist/NAME/"
def __init__(self, match):
self.root = text.ensure_http_scheme(match.group(1))
MangaExtractor.__init__(self, match, self.root + match.group(2) + "/")
self.root = text.ensure_http_scheme(match[1])
MangaExtractor.__init__(self, match, self.root + match[2] + "/")
def chapters(self, page):
results = []

View File

@@ -198,7 +198,7 @@ class HitomiSearchExtractor(HitomiExtractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.query = match.group(1)
self.query = match[1]
self.tags = text.unquote(self.query)
def items(self):
@@ -269,9 +269,9 @@ def _parse_gg(extr):
for match in util.re_compile(
r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)").finditer(page):
m[int(match.group(1))] = int(match.group(2))
m[int(match[1])] = int(match[2])
d = util.re_compile(r"(?:var\s|default:)\s*o\s*=\s*(\d+)").search(page)
b = util.re_compile(r"b:\s*[\"'](.+)[\"']").search(page)
return m, b.group(1).strip("/"), int(d.group(1)) if d else 0
return m, b[1].strip("/"), int(d[1]) if d else 0

View File

@@ -103,7 +103,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
def __init__(self, match):
HotleakExtractor.__init__(self, match)
self.creator = match.group(1)
self.creator = match[1]
def posts(self):
url = "{}/{}".format(self.root, self.creator)
@@ -178,7 +178,7 @@ class HotleakSearchExtractor(HotleakExtractor):
def __init__(self, match):
HotleakExtractor.__init__(self, match)
self.params = match.group(1)
self.params = match[1]
def items(self):
data = {"_extractor": HotleakCreatorExtractor}

View File

@@ -159,7 +159,7 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
def __init__(self, match):
IdolcomplexExtractor.__init__(self, match)
query = text.parse_query(match.group(1))
query = text.parse_query(match[1])
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
self.start_page = text.parse_int(query.get("page"), 1)
self.next = text.parse_int(query.get("next"), 0)

View File

@@ -19,7 +19,7 @@ class ImagebamExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.path = match.group(1)
self.path = match[1]
def _init(self):
self.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Leonid "Bepis" Pavel
# Copyright 2023 Mike Fährmann
# Copyright 2023-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,7 +23,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
example = "https://imgchest.com/p/abcdefghijk"
def __init__(self, match):
self.gallery_id = match.group(1)
self.gallery_id = match[1]
url = self.root + "/p/" + self.gallery_id
GalleryExtractor.__init__(self, match, url)

View File

@@ -45,7 +45,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
self.gid = match.group(1)
self.gid = match[1]
self.image_id = ""
def items(self):
@@ -116,7 +116,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
self.image_id = match.group(1)
self.image_id = match[1]
def items(self):
url, data = self.get_image()

View File

@@ -28,8 +28,8 @@ class ImagehostImageExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.page_url = "http{}://{}".format(
"s" if self._https else "", match.group(1))
self.token = match.group(2)
"s" if self._https else "", match[1])
self.token = match[2]
if self._params == "simple":
self._params = {

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2023 Mike Fährmann
# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -126,8 +126,8 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
self.album_name = None
self.album_id = match.group(1)
self.sort = text.parse_query(match.group(2)).get("sort", "date_desc")
self.album_id = match[1]
self.sort = text.parse_query(match[2]).get("sort", "date_desc")
self.page_url = "https://ibb.co/album/" + self.album_id
def metadata(self, page):
@@ -162,8 +162,8 @@ class ImgbbUserExtractor(ImgbbExtractor):
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
self.user = match.group(1)
self.sort = text.parse_query(match.group(2)).get("sort", "date_desc")
self.user = match[1]
self.sort = text.parse_query(match[2]).get("sort", "date_desc")
self.page_url = "https://{}.imgbb.com/".format(self.user)
def metadata(self, page):
@@ -191,7 +191,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
self.image_id = match.group(1)
self.image_id = match[1]
def items(self):
url = "https://ibb.co/" + self.image_id

View File

@@ -62,7 +62,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
def __init__(self, match):
ImgboxExtractor.__init__(self, match)
self.gallery_key = match.group(1)
self.gallery_key = match[1]
self.image_keys = []
def get_job_metadata(self):
@@ -93,7 +93,7 @@ class ImgboxImageExtractor(ImgboxExtractor):
def __init__(self, match):
ImgboxExtractor.__init__(self, match)
self.image_key = match.group(1)
self.image_key = match[1]
def get_image_keys(self):
return (self.image_key,)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2023 Mike Fährmann
# Copyright 2015-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,7 +20,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
example = "https://imgth.com/gallery/123/TITLE"
def __init__(self, match):
self.gallery_id = gid = match.group(1)
self.gallery_id = gid = match[1]
url = "{}/gallery/{}/g/".format(self.root, gid)
GalleryExtractor.__init__(self, match, url)

View File

@@ -21,7 +21,7 @@ class ImgurExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1)
self.key = match[1]
def _init(self):
self.api = ImgurAPI(self)
@@ -168,7 +168,7 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor):
def __init__(self, match):
ImgurExtractor.__init__(self, match)
self.folder_id = match.group(2)
self.folder_id = match[2]
def items(self):
return self._items_queue(self.api.account_favorites_folder(

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020-2023 Mike Fährmann
# Copyright 2020-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -109,12 +109,12 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
pid = match.group(1)
pid = match[1]
if pid:
self.pool_id = pid
self.orderby = "pool_order"
else:
params = text.parse_query(match.group(2))
params = text.parse_query(match[2])
self.pool_id = params.get("pool_id")
self.orderby = params.get("orderby", "pool_order")
@@ -142,12 +142,12 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
uid = match.group(1)
uid = match[1]
if uid:
self.user_id = uid
self.orderby = self.config("orderby", "fav_datetime")
else:
params = text.parse_query(match.group(2))
params = text.parse_query(match[2])
self.user_id = params.get("user_id")
self.orderby = params.get("orderby", "fav_datetime")
@@ -184,7 +184,7 @@ class InkbunnyUnreadExtractor(InkbunnyExtractor):
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
self.params = text.parse_query(match.group(1))
self.params = text.parse_query(match[1])
def posts(self):
params = self.params.copy()
@@ -204,7 +204,7 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
self.params = text.parse_query(match.group(1))
self.params = text.parse_query(match[1])
def metadata(self):
return {"search": self.params}
@@ -241,8 +241,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
self.user_id = match.group(1) or \
text.parse_query(match.group(2)).get("user_id")
self.user_id = match[1] or \
text.parse_query(match[2]).get("user_id")
def items(self):
url = self.root + "/watchlist_process.php"
@@ -276,7 +276,7 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
self.submission_id = match.group(1)
self.submission_id = match[1]
def posts(self):
submissions = self.api.detail(({"submission_id": self.submission_id},))

View File

@@ -33,7 +33,7 @@ class InstagramExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)
self.item = match[1]
def _init(self):
self.www_claim = "0"
@@ -513,7 +513,7 @@ class InstagramGuideExtractor(InstagramExtractor):
def __init__(self, match):
InstagramExtractor.__init__(self, match)
self.guide_id = match.group(2)
self.guide_id = match[2]
def metadata(self):
return {"guide": self.api.guide(self.guide_id)}

View File

@@ -36,8 +36,8 @@ class JschanThreadExtractor(JschanExtractor):
def __init__(self, match):
JschanExtractor.__init__(self, match)
index = match.lastindex
self.board = match.group(index-1)
self.thread = match.group(index)
self.board = match[index-1]
self.thread = match[index]
def items(self):
url = "{}/{}/thread/{}.json".format(
@@ -70,7 +70,7 @@ class JschanBoardExtractor(JschanExtractor):
def __init__(self, match):
JschanExtractor.__init__(self, match)
self.board = match.group(match.lastindex)
self.board = match[match.lastindex]
def items(self):
url = "{}/{}/catalog.json".format(self.root, self.board)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020-2023 Mike Fährmann
# Copyright 2020-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,7 +25,7 @@ class KabeuchiUserExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user_id = match.group(1)
self.user_id = match[1]
def items(self):
base = "{}/accounts/upfile/{}/{}/".format(

View File

@@ -24,8 +24,8 @@ class KeenspotComicExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.comic = match.group(1).lower()
self.path = match.group(2)
self.comic = match[1].lower()
self.path = match[2]
self.root = "http://" + self.comic + ".keenspot.com"
self._needle = ""

View File

@@ -29,9 +29,9 @@ class KemonoExtractor(Extractor):
cookies_domain = ".kemono.su"
def __init__(self, match):
tld = match.group(2)
self.category = domain = match.group(1)
self.root = text.root_from_url(match.group(0))
tld = match[2]
self.category = domain = match[1]
self.root = text.root_from_url(match[0])
self.cookies_domain = ".{}.{}".format(domain, tld)
Extractor.__init__(self, match)
@@ -125,7 +125,7 @@ class KemonoExtractor(Extractor):
match = find_hash(url)
if match:
file["hash"] = hash = match.group(1)
file["hash"] = hash = match[1]
if not duplicates:
if hash in hashes:
self.log.debug("Skipping %s (duplicate)", url)
@@ -310,7 +310,7 @@ class KemonoUserExtractor(KemonoExtractor):
example = "https://kemono.su/SERVICE/user/12345"
def __init__(self, match):
self.subcategory = match.group(3)
self.subcategory = match[3]
KemonoExtractor.__init__(self, match)
def posts(self):
@@ -356,7 +356,7 @@ class KemonoPostExtractor(KemonoExtractor):
example = "https://kemono.su/SERVICE/user/12345/post/12345"
def __init__(self, match):
self.subcategory = match.group(3)
self.subcategory = match[3]
KemonoExtractor.__init__(self, match)
def posts(self):
@@ -423,7 +423,7 @@ class KemonoDiscordExtractor(KemonoExtractor):
append = files.append
for attachment in post["attachments"]:
match = find_hash(attachment["path"])
attachment["hash"] = match.group(1) if match else ""
attachment["hash"] = match[1] if match else ""
attachment["type"] = "attachment"
append(attachment)
for path in find_inline(post["content"] or ""):

View File

@@ -26,7 +26,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.album = match.group(1)
self.album = match[1]
def items(self):
url = self.root + "/game-soundtracks/album/" + self.album

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2023 Mike Fährmann
# Copyright 2023-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -24,7 +24,7 @@ class LexicaSearchExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.query = match.group(1)
self.query = match[1]
self.text = text.unquote(self.query).replace("+", " ")
def items(self):

View File

@@ -22,7 +22,7 @@ class LightroomGalleryExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.href = match.group(1)
self.href = match[1]
def items(self):
# Get config

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2023 Mike Fährmann
# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,7 +22,7 @@ class LivedoorExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
def items(self):
for post in self.posts():
@@ -108,7 +108,7 @@ class LivedoorPostExtractor(LivedoorExtractor):
def __init__(self, match):
LivedoorExtractor.__init__(self, match)
self.post_id = match.group(2)
self.post_id = match[2]
def posts(self):
url = "{}/{}/archives/{}.html".format(

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2023 Mike Fährmann
# Copyright 2016-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -51,7 +51,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
def __init__(self, match):
LusciousExtractor.__init__(self, match)
self.album_id = match.group(1)
self.album_id = match[1]
def _init(self):
self.gif = self.config("gif", False)
@@ -280,7 +280,7 @@ class LusciousSearchExtractor(LusciousExtractor):
def __init__(self, match):
LusciousExtractor.__init__(self, match)
self.query = match.group(1)
self.query = match[1]
def items(self):
query = text.parse_query(self.query)

View File

@@ -45,8 +45,8 @@ class LynxchanThreadExtractor(LynxchanExtractor):
def __init__(self, match):
LynxchanExtractor.__init__(self, match)
index = match.lastindex
self.board = match.group(index-1)
self.thread = match.group(index)
self.board = match[index-1]
self.thread = match[index]
def items(self):
url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
@@ -75,7 +75,7 @@ class LynxchanBoardExtractor(LynxchanExtractor):
def __init__(self, match):
LynxchanExtractor.__init__(self, match)
self.board = match.group(match.lastindex)
self.board = match[match.lastindex]
def items(self):
url = "{}/{}/catalog.json".format(self.root, self.board)

View File

@@ -172,7 +172,7 @@ class MangadexListExtractor(MangadexExtractor):
"/01234567-89ab-cdef-0123-456789abcdef/NAME")
def __init__(self, match):
if match.group(2) == "feed":
if match[2] == "feed":
self.subcategory = "list-feed"
else:
self.items = self._items_manga

View File

@@ -75,7 +75,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
self.root = text.root_from_url(match[0])
ChapterExtractor.__init__(self, match, False)
def metadata(self, _):
@@ -115,8 +115,8 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
example = "https://mangapark.net/title/12345-MANGA"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
self.manga_id = int(match.group(1))
self.root = text.root_from_url(match[0])
self.manga_id = int(match[1])
Extractor.__init__(self, match)
def items(self):

View File

@@ -82,7 +82,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
def __init__(self, match):
MangoxoExtractor.__init__(self, match)
self.album_id = match.group(1)
self.album_id = match[1]
def items(self):
self.login()
@@ -147,7 +147,7 @@ class MangoxoChannelExtractor(MangoxoExtractor):
def __init__(self, match):
MangoxoExtractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
def items(self):
self.login()

View File

@@ -22,7 +22,7 @@ class MastodonExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
self.item = match.group(match.lastindex)
self.item = match[match.lastindex]
def _init(self):
self.instance = self.root.partition("://")[2]

View File

@@ -20,7 +20,7 @@ class MisskeyExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
self.item = match.group(match.lastindex)
self.item = match[match.lastindex]
def _init(self):
self.api = MisskeyAPI(self)

View File

@@ -98,7 +98,7 @@ class MoebooruTagExtractor(MoebooruExtractor):
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
tags = match.group(match.lastindex)
tags = match[match.lastindex]
self.tags = text.unquote(tags.replace("+", " "))
def metadata(self):
@@ -118,7 +118,7 @@ class MoebooruPoolExtractor(MoebooruExtractor):
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.pool_id = match.group(match.lastindex)
self.pool_id = match[match.lastindex]
def metadata(self):
if self.config("metadata"):
@@ -142,7 +142,7 @@ class MoebooruPostExtractor(MoebooruExtractor):
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.post_id = match.group(match.lastindex)
self.post_id = match[match.lastindex]
def posts(self):
params = {"tags": "id:" + self.post_id}
@@ -159,8 +159,8 @@ class MoebooruPopularExtractor(MoebooruExtractor):
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
self.scale = match.group(match.lastindex-1)
self.query = match.group(match.lastindex)
self.scale = match[match.lastindex-1]
self.query = match[match.lastindex]
def metadata(self):
self.params = params = text.parse_query(self.query)

View File

@@ -20,7 +20,7 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
example = "https://myhentaigallery.com/g/12345"
def __init__(self, match):
self.gallery_id = match.group(1)
self.gallery_id = match[1]
url = "{}/g/{}".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)

View File

@@ -33,13 +33,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
example = "https://blog.naver.com/BLOGID/12345"
def __init__(self, match):
blog_id = match.group(1)
blog_id = match[1]
if blog_id:
self.blog_id = blog_id
self.post_id = match.group(2)
self.post_id = match[2]
else:
self.blog_id = match.group(3)
self.post_id = match.group(4)
self.blog_id = match[3]
self.post_id = match[4]
url = "{}/PostView.nhn?blogId={}&logNo={}".format(
self.root, self.blog_id, self.post_id)
@@ -134,7 +134,7 @@ class NaverBlogExtractor(NaverBase, Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.blog_id = match.group(1) or match.group(2)
self.blog_id = match[1] or match[2]
def items(self):
# fetch first post number

View File

@@ -30,7 +30,7 @@ class NewgroundsExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
self.user_root = "https://{}.newgrounds.com".format(self.user)
def _init(self):
@@ -397,12 +397,12 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
if match.group(2):
self.user = match.group(2)
if match[2]:
self.user = match[2]
self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
self.user, match.group(3))
self.user, match[3])
else:
self.post_url = text.ensure_http_scheme(match.group(0))
self.post_url = text.ensure_http_scheme(match[0])
def posts(self):
return (self.post_url,)
@@ -417,7 +417,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
self.user = ""
self.post_url = self.root + match.group(1)
self.post_url = self.root + match[1]
def posts(self):
return (self.post_url,)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2023 Mike Fährmann
# Copyright 2015-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,7 +22,7 @@ class NhentaiGalleryExtractor(GalleryExtractor):
example = "https://nhentai.net/g/12345/"
def __init__(self, match):
url = self.root + "/api/gallery/" + match.group(1)
url = self.root + "/api/gallery/" + match[1]
GalleryExtractor.__init__(self, match, url)
def metadata(self, page):

View File

@@ -23,7 +23,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
self.user_id = text.parse_int(match.group(match.lastindex))
self.user_id = text.parse_int(match[match.lastindex])
def initialize(self):
self.cookies_domain = "." + self.root.rpartition("/")[2]
@@ -296,7 +296,7 @@ class NijieImageExtractor(NijieExtractor):
def __init__(self, match):
NijieExtractor.__init__(self, match)
self.image_id = match.group(match.lastindex)
self.image_id = match[match.lastindex]
def image_ids(self):
return (self.image_id,)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2022-2023 Mike Fährmann
# Copyright 2022-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,8 +25,8 @@ class NitterExtractor(BaseExtractor):
BaseExtractor.__init__(self, match)
lastindex = match.lastindex
self.user = match.group(lastindex)
self.user_id = match.group(lastindex + 1)
self.user = match[lastindex]
self.user_id = match[lastindex + 1]
self.user_obj = None
def items(self):

View File

@@ -110,7 +110,7 @@ class NozomiPostExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
self.post_id = match.group(1)
self.post_id = match[1]
def posts(self):
return (self.post_id,)
@@ -157,7 +157,7 @@ class NozomiSearchExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1)).split()
self.tags = text.unquote(match[1]).split()
def metadata(self):
return {"search_tags": self.tags}

View File

@@ -25,7 +25,7 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
example = "https://nsfwalbum.com/album/12345"
def __init__(self, match):
self.album_id = match.group(2)
self.album_id = match[2]
GalleryExtractor.__init__(self, match)
def metadata(self, page):

View File

@@ -354,7 +354,7 @@ class OAuthMastodon(OAuthBase):
def __init__(self, match):
OAuthBase.__init__(self, match)
self.instance = match.group(1)
self.instance = match[1]
def items(self):
yield Message.Version, 1

View File

@@ -67,7 +67,7 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
def __init__(self, match):
PhilomenaExtractor.__init__(self, match)
self.image_id = match.group(match.lastindex)
self.image_id = match[match.lastindex]
def posts(self):
return (self.api.image(self.image_id),)
@@ -116,7 +116,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
def __init__(self, match):
PhilomenaExtractor.__init__(self, match)
self.gallery_id = match.group(match.lastindex)
self.gallery_id = match[match.lastindex]
def metadata(self):
try:

View File

@@ -23,7 +23,7 @@ class PhotovogueUserExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user_id = match.group(1)
self.user_id = match[1]
def items(self):
for photo in self.photos():

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2021-2023 Mike Fährmann
# Copyright 2021-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,7 +25,7 @@ class PicartoGalleryExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.username = match.group(1)
self.username = match[1]
def items(self):
for post in self.posts():

View File

@@ -27,7 +27,7 @@ class PillowfortExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)
self.item = match[1]
def items(self):
self.login()

View File

@@ -214,7 +214,7 @@ class PinterestPinExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.pin_id = match.group(1)
self.pin_id = match[1]
self.pin = None
def metadata(self):
@@ -236,8 +236,8 @@ class PinterestBoardExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.user = text.unquote(match.group(1))
self.board_name = text.unquote(match.group(2))
self.user = text.unquote(match[1])
self.board_name = text.unquote(match[2])
self.board = None
def metadata(self):
@@ -266,7 +266,7 @@ class PinterestUserExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.user = text.unquote(match.group(1))
self.user = text.unquote(match[1])
def items(self):
for board in self.api.boards(self.user):
@@ -285,7 +285,7 @@ class PinterestAllpinsExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.user = text.unquote(match.group(1))
self.user = text.unquote(match[1])
def metadata(self):
return {"user": self.user}
@@ -303,7 +303,7 @@ class PinterestCreatedExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.user = text.unquote(match.group(1))
self.user = text.unquote(match[1])
def metadata(self):
return {"user": self.user}
@@ -323,9 +323,9 @@ class PinterestSectionExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.user = text.unquote(match.group(1))
self.board_slug = text.unquote(match.group(2))
self.section_slug = text.unquote(match.group(3))
self.user = text.unquote(match[1])
self.board_slug = text.unquote(match[2])
self.section_slug = text.unquote(match[3])
self.section = None
def metadata(self):
@@ -351,7 +351,7 @@ class PinterestSearchExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.search = text.unquote(match.group(1))
self.search = text.unquote(match[1])
def metadata(self):
return {"search": self.search}

View File

@@ -39,7 +39,7 @@ class PixeldrainFileExtractor(PixeldrainExtractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.file_id = match.group(1)
self.file_id = match[1]
def items(self):
url = "{}/api/file/{}".format(self.root, self.file_id)
@@ -64,8 +64,8 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.album_id = match.group(1)
self.file_index = match.group(2)
self.album_id = match[1]
self.file_index = match[2]
def items(self):
url = "{}/api/list/{}".format(self.root, self.album_id)

View File

@@ -320,7 +320,7 @@ class PixivExtractor(Extractor):
if not caption:
return ""
return text.unescape(self.meta_captions_sub(
lambda m: '<a href="' + text.unquote(m.group(1)), caption))
lambda m: '<a href="' + text.unquote(m[1]), caption))
def _fallback_image(self, src):
if isinstance(src, str):
@@ -558,7 +558,7 @@ class PixivWorkExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.illust_id = match.group(1) or match.group(2)
self.illust_id = match[1] or match[2]
def works(self):
works = (self.api.illust_detail(self.illust_id),)
@@ -658,7 +658,7 @@ class PixivRankingExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.query = match.group(1)
self.query = match[1]
self.mode = self.date = None
def works(self):
@@ -812,7 +812,7 @@ class PixivPixivisionExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.pixivision_id = match.group(1)
self.pixivision_id = match[1]
def works(self):
return (
@@ -870,7 +870,7 @@ class PixivNovelExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.novel_id = match.group(1)
self.novel_id = match[1]
def items(self):
tags = self.config("tags", "japanese")
@@ -1041,7 +1041,7 @@ class PixivSketchExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.username = match.group(1)
self.username = match[1]
def items(self):
headers = {"Referer": "{}/@{}".format(self.root, self.username)}

View File

@@ -73,7 +73,7 @@ class PlurkTimelineExtractor(PlurkExtractor):
def __init__(self, match):
PlurkExtractor.__init__(self, match)
self.user = match.group(1)
self.user = match[1]
def plurks(self):
url = "{}/{}".format(self.root, self.user)

View File

@@ -23,7 +23,7 @@ class PoringaExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)
self.item = match[1]
self.__cookies = True
def items(self):

View File

@@ -60,7 +60,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
def __init__(self, match):
PornhubExtractor.__init__(self, match)
self.gallery_id = match.group(1)
self.gallery_id = match[1]
self._first = None
def items(self):
@@ -141,7 +141,7 @@ class PornhubGifExtractor(PornhubExtractor):
def __init__(self, match):
PornhubExtractor.__init__(self, match)
self.gallery_id = match.group(1)
self.gallery_id = match[1]
def items(self):
url = "{}/gif/{}".format(self.root, self.gallery_id)

View File

@@ -62,7 +62,7 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
example = "https://www.pornpics.com/galleries/TITLE-12345/"
def __init__(self, match):
url = "{}/galleries/{}/".format(self.root, match.group(1))
url = "{}/galleries/{}/".format(self.root, match[1])
GalleryExtractor.__init__(self, match, url)
items = GalleryExtractor.items

View File

@@ -46,8 +46,8 @@ class PostmillExtractor(BaseExtractor):
'</div>')
match = self._search_canonical_url(post_canonical_url)
forum = match.group(1)
id = int(match.group(2))
forum = match[1]
id = int(match[2])
is_text_post = (url[0] == "/")
is_image_post = self._search_image_tag(page) is not None
@@ -142,8 +142,8 @@ class PostmillPostExtractor(PostmillExtractor):
def __init__(self, match):
PostmillExtractor.__init__(self, match)
self.forum = match.group(3)
self.post_id = match.group(4)
self.forum = match[3]
self.post_id = match[4]
def post_urls(self):
return (self.root + "/f/" + self.forum + "/" + self.post_id,)

View File

@@ -25,7 +25,7 @@ class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor):
example = "https://rawkuma.net/TITLE-chapter-123/"
def __init__(self, match):
url = "{}/{}/".format(self.root, match.group(1))
url = "{}/{}/".format(self.root, match[1])
ChapterExtractor.__init__(self, match, url)
def metadata(self, page):
@@ -61,7 +61,7 @@ class RawkumaMangaExtractor(RawkumaBase, MangaExtractor):
example = "https://rawkuma.net/manga/TITLE/"
def __init__(self, match):
url = "{}/manga/{}/".format(self.root, match.group(1))
url = "{}/manga/{}/".format(self.root, match[1])
MangaExtractor.__init__(self, match, url)
def chapters(self, page):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2023 Mike Fährmann
# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,7 +23,7 @@ class ReactorExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
url = text.ensure_http_scheme(match.group(0), "http://")
url = text.ensure_http_scheme(match[0], "http://")
pos = url.index("/", 10)
self.root = url[:pos]
self.path = url[pos:]
@@ -176,7 +176,7 @@ class ReactorTagExtractor(ReactorExtractor):
def __init__(self, match):
ReactorExtractor.__init__(self, match)
self.tag = match.group(match.lastindex)
self.tag = match[match.lastindex]
def metadata(self):
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
@@ -192,7 +192,7 @@ class ReactorSearchExtractor(ReactorExtractor):
def __init__(self, match):
ReactorExtractor.__init__(self, match)
self.tag = match.group(match.lastindex)
self.tag = match[match.lastindex]
def metadata(self):
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
@@ -207,7 +207,7 @@ class ReactorUserExtractor(ReactorExtractor):
def __init__(self, match):
ReactorExtractor.__init__(self, match)
self.user = match.group(match.lastindex)
self.user = match[match.lastindex]
def metadata(self):
return {"user": text.unescape(self.user).replace("+", " ")}
@@ -221,7 +221,7 @@ class ReactorPostExtractor(ReactorExtractor):
def __init__(self, match):
ReactorExtractor.__init__(self, match)
self.post_id = match.group(match.lastindex)
self.post_id = match[match.lastindex]
def items(self):
post = self.request(self.root + self.path).text

View File

@@ -50,7 +50,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
def __init__(self, match):
ChapterExtractor.__init__(self, match)
self.params = match.group(2)
self.params = match[2]
def _init(self):
params = text.parse_query(self.params)
@@ -71,7 +71,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
return {
"comic": comic,
"issue": match.group(1) or match.group(2),
"issue": match[1] or match[2],
"issue_id": text.parse_int(self.issue_id),
"lang": "en",
"language": "English",

View File

@@ -28,4 +28,4 @@ class RecursiveExtractor(Extractor):
page = self.request(text.ensure_http_scheme(url)).text
for match in util.re(r"https?://[^\s\"']+").finditer(page):
yield Message.Queue, match.group(0), {}
yield Message.Queue, match[0], {}

View File

@@ -141,7 +141,7 @@ class RedditExtractor(Extractor):
match = match_submission(url)
if match:
extra.append(match.group(1))
extra.append(match[1])
elif not match_user(url) and not match_subreddit(url):
if previews and "comment" not in data and \
"preview" in data:
@@ -309,7 +309,7 @@ class RedditSubmissionExtractor(RedditExtractor):
def __init__(self, match):
RedditExtractor.__init__(self, match)
self.submission_id = match.group(1)
self.submission_id = match[1]
def submissions(self):
return (self.api.submission(self.submission_id),)
@@ -326,14 +326,14 @@ class RedditImageExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
domain = match.group(1)
self.path = match.group(2)
domain = match[1]
self.path = match[2]
if domain == "preview.redd.it":
self.domain = "i.redd.it"
self.query = ""
else:
self.domain = domain
self.query = match.group(3) or ""
self.query = match[3] or ""
def items(self):
url = "https://{}/{}{}".format(self.domain, self.path, self.query)

View File

@@ -23,7 +23,7 @@ class RedgifsExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1)
self.key = match[1]
def _init(self):
self.api = RedgifsAPI(self)
@@ -94,7 +94,7 @@ class RedgifsUserExtractor(RedgifsExtractor):
def __init__(self, match):
RedgifsExtractor.__init__(self, match)
self.query = match.group(2)
self.query = match[2]
def metadata(self):
return {"userName": self.key}
@@ -116,7 +116,7 @@ class RedgifsCollectionExtractor(RedgifsExtractor):
def __init__(self, match):
RedgifsExtractor.__init__(self, match)
self.collection_id = match.group(2)
self.collection_id = match[2]
def metadata(self):
collection = self.api.collection_info(self.key, self.collection_id)
@@ -151,7 +151,7 @@ class RedgifsNichesExtractor(RedgifsExtractor):
def __init__(self, match):
RedgifsExtractor.__init__(self, match)
self.query = match.group(2)
self.query = match[2]
def gifs(self):
order = text.parse_query(self.query).get("order")

View File

@@ -61,7 +61,7 @@ class Rule34usTagExtractor(Rule34usExtractor):
def __init__(self, match):
Rule34usExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1).replace("+", " "))
self.tags = text.unquote(match[1].replace("+", " "))
def metadata(self):
return {"search_tags": self.tags}
@@ -98,7 +98,7 @@ class Rule34usPostExtractor(Rule34usExtractor):
def __init__(self, match):
Rule34usExtractor.__init__(self, match)
self.post_id = match.group(1)
self.post_id = match[1]
def posts(self):
return (self._parse_post(self.post_id),)

View File

@@ -124,7 +124,7 @@ class SankakuTagExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
query = text.parse_query(match.group(1))
query = text.parse_query(match[1])
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
if "date:" in self.tags:
@@ -154,7 +154,7 @@ class SankakuPoolExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
self.pool_id = match.group(1)
self.pool_id = match[1]
def metadata(self):
pool = self.api.pools(self.pool_id)
@@ -180,7 +180,7 @@ class SankakuPostExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
self.post_id = match.group(1)
self.post_id = match[1]
def posts(self):
return self.api.posts(self.post_id)
@@ -194,7 +194,7 @@ class SankakuBooksExtractor(SankakuExtractor):
def __init__(self, match):
SankakuExtractor.__init__(self, match)
query = text.parse_query(match.group(1))
query = text.parse_query(match[1])
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
def items(self):

View File

@@ -19,7 +19,7 @@ class SankakucomplexExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.path = match.group(1)
self.path = match[1]
class SankakucomplexArticleExtractor(SankakucomplexExtractor):

View File

@@ -187,7 +187,7 @@ class SeigaImageExtractor(SeigaExtractor):
def __init__(self, match):
SeigaExtractor.__init__(self, match)
self.image_id = match.group(1)
self.image_id = match[1]
def skip(self, num):
self.start_image += num

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2023 Mike Fährmann
# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,7 +20,7 @@ class ShopifyExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
self.item_url = self.root + match.group(match.lastindex)
self.item_url = self.root + match[match.lastindex]
def items(self):
data = self.metadata()

Some files were not shown because too many files have changed in this diff Show More