@@ -68,7 +68,7 @@ class _2chBoardExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.board = match.group(1)
|
self.board = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
# index page
|
# index page
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ class _2chenBoardExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.board = match.group(1)
|
self.board = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/catalog".format(self.root, self.board)
|
url = "{}/{}/catalog".format(self.root, self.board)
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ class _35photoUserExtractor(_35photoExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
_35photoExtractor.__init__(self, match)
|
_35photoExtractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
self.user_id = 0
|
self.user_id = 0
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -133,7 +133,7 @@ class _35photoTagExtractor(_35photoExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
_35photoExtractor.__init__(self, match)
|
_35photoExtractor.__init__(self, match)
|
||||||
self.tag = match.group(1)
|
self.tag = match[1]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tag": text.unquote(self.tag).lower()}
|
return {"search_tag": text.unquote(self.tag).lower()}
|
||||||
@@ -198,7 +198,7 @@ class _35photoImageExtractor(_35photoExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
_35photoExtractor.__init__(self, match)
|
_35photoExtractor.__init__(self, match)
|
||||||
self.photo_id = match.group(1)
|
self.photo_id = match[1]
|
||||||
|
|
||||||
def photos(self):
|
def photos(self):
|
||||||
return (self.photo_id,)
|
return (self.photo_id,)
|
||||||
|
|||||||
@@ -93,8 +93,8 @@ class _4archiveBoardExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.board = match.group(1)
|
self.board = match[1]
|
||||||
self.num = text.parse_int(match.group(2), 1)
|
self.num = text.parse_int(match[2], 1)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = {"_extractor": _4archiveThreadExtractor}
|
data = {"_extractor": _4archiveThreadExtractor}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2015-2023 Mike Fährmann
|
# Copyright 2015-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -59,7 +59,7 @@ class _4chanBoardExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.board = match.group(1)
|
self.board = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "https://a.4cdn.org/{}/threads.json".format(self.board)
|
url = "https://a.4cdn.org/{}/threads.json".format(self.board)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2019-2023 Mike Fährmann
|
# Copyright 2019-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -97,7 +97,7 @@ class _500pxUserExtractor(_500pxExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
_500pxExtractor.__init__(self, match)
|
_500pxExtractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
|
|
||||||
def photos(self):
|
def photos(self):
|
||||||
variables = {"username": self.user, "pageSize": 20}
|
variables = {"username": self.user, "pageSize": 20}
|
||||||
@@ -207,7 +207,7 @@ class _500pxImageExtractor(_500pxExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
_500pxExtractor.__init__(self, match)
|
_500pxExtractor.__init__(self, match)
|
||||||
self.photo_id = match.group(1)
|
self.photo_id = match[1]
|
||||||
|
|
||||||
def photos(self):
|
def photos(self):
|
||||||
edges = ({"node": {"legacyId": self.photo_id}},)
|
edges = ({"node": {"legacyId": self.photo_id}},)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2022-2023 Mike Fährmann
|
# Copyright 2022-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -23,7 +23,7 @@ class _8chanExtractor(Extractor):
|
|||||||
root = "https://8chan.moe"
|
root = "https://8chan.moe"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.root = "https://8chan." + match.group(1)
|
self.root = "https://8chan." + match[1]
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
@memcache()
|
@memcache()
|
||||||
|
|||||||
@@ -26,8 +26,8 @@ class _8musesAlbumExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.path = match.group(1)
|
self.path = match[1]
|
||||||
self.params = match.group(2) or ""
|
self.params = match[2] or ""
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = self.root + self.path + self.params
|
url = self.root + self.path + self.params
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GalleryExtractor.__init__(self, match)
|
GalleryExtractor.__init__(self, match)
|
||||||
self.gallery_id = match.group(2)
|
self.gallery_id = match[2]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.cookies.set("ageConfirmed", "true", domain="www.adultempire.com")
|
self.cookies.set("ageConfirmed", "true", domain="www.adultempire.com")
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
|
|||||||
example = "https://architizer.com/projects/NAME/"
|
example = "https://architizer.com/projects/NAME/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
url = "{}/projects/{}/".format(self.root, match.group(1))
|
url = "{}/projects/{}/".format(self.root, match[1])
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
@@ -68,7 +68,7 @@ class ArchitizerFirmExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.firm = match.group(1)
|
self.firm = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = url = "{}/firms/{}/?requesting_merlin=pages".format(
|
url = url = "{}/firms/{}/?requesting_merlin=pages".format(
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class ArtstationExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user = match.group(1) or match.group(2)
|
self.user = match[1] or match[2]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.session.headers["Cache-Control"] = "max-age=0"
|
self.session.headers["Cache-Control"] = "max-age=0"
|
||||||
@@ -215,7 +215,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
self.album_id = text.parse_int(match.group(3))
|
self.album_id = text.parse_int(match[3])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
userinfo = self.get_user_info(self.user)
|
userinfo = self.get_user_info(self.user)
|
||||||
@@ -264,7 +264,7 @@ class ArtstationCollectionExtractor(ArtstationExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
self.collection_id = match.group(2)
|
self.collection_id = match[2]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
url = "{}/collections/{}.json".format(
|
url = "{}/collections/{}.json".format(
|
||||||
@@ -314,8 +314,8 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
self.challenge_id = match.group(1)
|
self.challenge_id = match[1]
|
||||||
self.sorting = match.group(2) or "popular"
|
self.sorting = match[2] or "popular"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
challenge_url = "{}/contests/_/challenges/{}.json".format(
|
challenge_url = "{}/contests/_/challenges/{}.json".format(
|
||||||
@@ -359,7 +359,7 @@ class ArtstationSearchExtractor(ArtstationExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
self.params = query = text.parse_query(match.group(1))
|
self.params = query = text.parse_query(match[1])
|
||||||
self.query = text.unquote(query.get("query") or query.get("q", ""))
|
self.query = text.unquote(query.get("query") or query.get("q", ""))
|
||||||
self.sorting = query.get("sort_by", "relevance").lower()
|
self.sorting = query.get("sort_by", "relevance").lower()
|
||||||
self.tags = query.get("tags", "").split(",")
|
self.tags = query.get("tags", "").split(",")
|
||||||
@@ -406,7 +406,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
self.query = text.parse_query(match.group(1))
|
self.query = text.parse_query(match[1])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"artwork": self.query}
|
return {"artwork": self.query}
|
||||||
@@ -426,7 +426,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
self.project_id = match.group(1)
|
self.project_id = match[1]
|
||||||
self.assets = None
|
self.assets = None
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ class AryionExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
self.recursive = True
|
self.recursive = True
|
||||||
|
|
||||||
def login(self):
|
def login(self):
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BehanceExtractor.__init__(self, match)
|
BehanceExtractor.__init__(self, match)
|
||||||
self.gallery_id = match.group(1)
|
self.gallery_id = match[1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
BehanceExtractor._init(self)
|
BehanceExtractor._init(self)
|
||||||
@@ -229,7 +229,7 @@ class BehanceUserExtractor(BehanceExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BehanceExtractor.__init__(self, match)
|
BehanceExtractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
|
|
||||||
def galleries(self):
|
def galleries(self):
|
||||||
endpoint = "GetProfileProjects"
|
endpoint = "GetProfileProjects"
|
||||||
@@ -257,7 +257,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BehanceExtractor.__init__(self, match)
|
BehanceExtractor.__init__(self, match)
|
||||||
self.collection_id = match.group(1)
|
self.collection_id = match[1]
|
||||||
|
|
||||||
def galleries(self):
|
def galleries(self):
|
||||||
endpoint = "GetMoodboardItemsAndRecommendations"
|
endpoint = "GetMoodboardItemsAndRecommendations"
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ class BloggerPostExtractor(BloggerExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BloggerExtractor.__init__(self, match)
|
BloggerExtractor.__init__(self, match)
|
||||||
self.path = match.group(match.lastindex)
|
self.path = match[match.lastindex]
|
||||||
|
|
||||||
def posts(self, blog):
|
def posts(self, blog):
|
||||||
return (self.api.post_by_path(blog["id"], self.path),)
|
return (self.api.post_by_path(blog["id"], self.path),)
|
||||||
@@ -128,7 +128,7 @@ class BloggerSearchExtractor(BloggerExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BloggerExtractor.__init__(self, match)
|
BloggerExtractor.__init__(self, match)
|
||||||
self.query = text.unquote(match.group(match.lastindex))
|
self.query = text.unquote(match[match.lastindex])
|
||||||
|
|
||||||
def posts(self, blog):
|
def posts(self, blog):
|
||||||
return self.api.blog_search(blog["id"], self.query)
|
return self.api.blog_search(blog["id"], self.query)
|
||||||
@@ -145,7 +145,7 @@ class BloggerLabelExtractor(BloggerExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BloggerExtractor.__init__(self, match)
|
BloggerExtractor.__init__(self, match)
|
||||||
self.label = text.unquote(match.group(match.lastindex))
|
self.label = text.unquote(match[match.lastindex])
|
||||||
|
|
||||||
def posts(self, blog):
|
def posts(self, blog):
|
||||||
return self.api.blog_posts(blog["id"], self.label)
|
return self.api.blog_posts(blog["id"], self.label)
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class CienExtractor(Extractor):
|
|||||||
request_interval = (1.0, 2.0)
|
request_interval = (1.0, 2.0)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.root = text.root_from_url(match.group(0))
|
self.root = text.root_from_url(match[0])
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
|
|||||||
@@ -926,7 +926,7 @@ class BaseExtractor(Extractor):
|
|||||||
if index:
|
if index:
|
||||||
self.category, self.root, info = self.instances[index-1]
|
self.category, self.root, info = self.instances[index-1]
|
||||||
if not self.root:
|
if not self.root:
|
||||||
self.root = text.root_from_url(self.match.group(0))
|
self.root = text.root_from_url(self.match[0])
|
||||||
self.config_instance = info.get
|
self.config_instance = info.get
|
||||||
else:
|
else:
|
||||||
self.root = group
|
self.root = group
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DesktopographyExtractor.__init__(self, match)
|
DesktopographyExtractor.__init__(self, match)
|
||||||
self.year = match.group(1)
|
self.year = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/exhibition-{}/".format(self.root, self.year)
|
url = "{}/exhibition-{}/".format(self.root, self.year)
|
||||||
@@ -75,7 +75,7 @@ class DesktopographyEntryExtractor(DesktopographyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DesktopographyExtractor.__init__(self, match)
|
DesktopographyExtractor.__init__(self, match)
|
||||||
self.entry = match.group(1)
|
self.entry = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/portfolios/{}".format(self.root, self.entry)
|
url = "{}/portfolios/{}".format(self.root, self.entry)
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user = (match.group(1) or match.group(2) or "").lower()
|
self.user = (match[1] or match[2] or "").lower()
|
||||||
self.offset = 0
|
self.offset = 0
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
@@ -227,7 +227,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
if txt is None:
|
if txt is None:
|
||||||
continue
|
continue
|
||||||
for match in DeviantartStashExtractor.pattern.finditer(txt):
|
for match in DeviantartStashExtractor.pattern.finditer(txt):
|
||||||
url = text.ensure_http_scheme(match.group(0))
|
url = text.ensure_http_scheme(match[0])
|
||||||
deviation["_extractor"] = DeviantartStashExtractor
|
deviation["_extractor"] = DeviantartStashExtractor
|
||||||
yield Message.Queue, url, deviation
|
yield Message.Queue, url, deviation
|
||||||
|
|
||||||
@@ -988,8 +988,8 @@ class DeviantartFolderExtractor(DeviantartExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DeviantartExtractor.__init__(self, match)
|
DeviantartExtractor.__init__(self, match)
|
||||||
self.folder = None
|
self.folder = None
|
||||||
self.folder_id = match.group(3)
|
self.folder_id = match[3]
|
||||||
self.folder_name = match.group(4)
|
self.folder_name = match[4]
|
||||||
|
|
||||||
def deviations(self):
|
def deviations(self):
|
||||||
folders = self.api.gallery_folders(self.user)
|
folders = self.api.gallery_folders(self.user)
|
||||||
@@ -1123,8 +1123,8 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DeviantartExtractor.__init__(self, match)
|
DeviantartExtractor.__init__(self, match)
|
||||||
self.collection = None
|
self.collection = None
|
||||||
self.collection_id = match.group(3)
|
self.collection_id = match[3]
|
||||||
self.collection_name = match.group(4)
|
self.collection_name = match[4]
|
||||||
|
|
||||||
def deviations(self):
|
def deviations(self):
|
||||||
folders = self.api.collections_folders(self.user)
|
folders = self.api.collections_folders(self.user)
|
||||||
@@ -1226,7 +1226,7 @@ class DeviantartTagExtractor(DeviantartExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DeviantartExtractor.__init__(self, match)
|
DeviantartExtractor.__init__(self, match)
|
||||||
self.tag = text.unquote(match.group(1))
|
self.tag = text.unquote(match[1])
|
||||||
self.user = ""
|
self.user = ""
|
||||||
|
|
||||||
def deviations(self):
|
def deviations(self):
|
||||||
@@ -1276,9 +1276,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DeviantartExtractor.__init__(self, match)
|
DeviantartExtractor.__init__(self, match)
|
||||||
self.type = match.group(3)
|
self.type = match[3]
|
||||||
self.deviation_id = \
|
self.deviation_id = \
|
||||||
match.group(4) or match.group(5) or id_from_base36(match.group(6))
|
match[4] or match[5] or id_from_base36(match[6])
|
||||||
|
|
||||||
def deviations(self):
|
def deviations(self):
|
||||||
if self.user:
|
if self.user:
|
||||||
@@ -1399,7 +1399,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
DeviantartExtractor.__init__(self, match)
|
DeviantartExtractor.__init__(self, match)
|
||||||
self.query = match.group(3)
|
self.query = match[3]
|
||||||
|
|
||||||
def deviations(self):
|
def deviations(self):
|
||||||
self.login()
|
self.login()
|
||||||
|
|||||||
@@ -55,10 +55,10 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
|
|||||||
group = extr('"icon-print"></i> ', '</span>')
|
group = extr('"icon-print"></i> ', '</span>')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"manga" : text.unescape(match.group(1)),
|
"manga" : text.unescape(match[1]),
|
||||||
"chapter" : text.parse_int(match.group(2)),
|
"chapter" : text.parse_int(match[2]),
|
||||||
"chapter_minor": match.group(3) or "",
|
"chapter_minor": match[3] or "",
|
||||||
"title" : text.unescape(match.group(4) or ""),
|
"title" : text.unescape(match[4] or ""),
|
||||||
"author" : text.remove_html(author),
|
"author" : text.remove_html(author),
|
||||||
"group" : (text.remove_html(group) or
|
"group" : (text.remove_html(group) or
|
||||||
text.extr(group, ' alt="', '"')),
|
text.extr(group, ' alt="', '"')),
|
||||||
@@ -102,7 +102,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.query = match.group(1) or ""
|
self.query = match[1] or ""
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Directory, {}
|
yield Message.Directory, {}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ class ExhentaiExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.version = match.group(1)
|
self.version = match[1]
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
domain = self.config("domain", "auto")
|
domain = self.config("domain", "auto")
|
||||||
@@ -122,10 +122,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ExhentaiExtractor.__init__(self, match)
|
ExhentaiExtractor.__init__(self, match)
|
||||||
self.gallery_id = text.parse_int(match.group(2) or match.group(5))
|
self.gallery_id = text.parse_int(match[2] or match[5])
|
||||||
self.gallery_token = match.group(3)
|
self.gallery_token = match[3]
|
||||||
self.image_token = match.group(4)
|
self.image_token = match[4]
|
||||||
self.image_num = text.parse_int(match.group(6), 1)
|
self.image_num = text.parse_int(match[6], 1)
|
||||||
self.key_start = None
|
self.key_start = None
|
||||||
self.key_show = None
|
self.key_show = None
|
||||||
self.key_next = None
|
self.key_next = None
|
||||||
@@ -573,7 +573,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ExhentaiExtractor.__init__(self, match)
|
ExhentaiExtractor.__init__(self, match)
|
||||||
|
|
||||||
_, query, tag = match.groups()
|
_, query, tag = self.groups
|
||||||
if tag:
|
if tag:
|
||||||
if "+" in tag:
|
if "+" in tag:
|
||||||
ns, _, tag = tag.rpartition(":")
|
ns, _, tag = tag.rpartition(":")
|
||||||
@@ -599,13 +599,13 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
|||||||
last = None
|
last = None
|
||||||
page = self.request(search_url, params=params).text
|
page = self.request(search_url, params=params).text
|
||||||
|
|
||||||
for gallery in ExhentaiGalleryExtractor.pattern.finditer(page):
|
for match in ExhentaiGalleryExtractor.pattern.finditer(page):
|
||||||
url = gallery.group(0)
|
url = match[0]
|
||||||
if url == last:
|
if url == last:
|
||||||
continue
|
continue
|
||||||
last = url
|
last = url
|
||||||
data["gallery_id"] = text.parse_int(gallery.group(2))
|
data["gallery_id"] = text.parse_int(match[2])
|
||||||
data["gallery_token"] = gallery.group(3)
|
data["gallery_token"] = match[3]
|
||||||
yield Message.Queue, url + "/", data
|
yield Message.Queue, url + "/", data
|
||||||
|
|
||||||
next_url = text.extr(page, 'nexturl="', '"', None)
|
next_url = text.extr(page, 'nexturl="', '"', None)
|
||||||
|
|||||||
@@ -351,7 +351,7 @@ class FanboxCreatorExtractor(FanboxExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FanboxExtractor.__init__(self, match)
|
FanboxExtractor.__init__(self, match)
|
||||||
self.creator_id = match.group(1) or match.group(2)
|
self.creator_id = match[1] or match[2]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "https://api.fanbox.cc/post.paginateCreator?creatorId="
|
url = "https://api.fanbox.cc/post.paginateCreator?creatorId="
|
||||||
@@ -378,7 +378,7 @@ class FanboxPostExtractor(FanboxExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FanboxExtractor.__init__(self, match)
|
FanboxExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(3)
|
self.post_id = match[3]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self._get_post_data(self.post_id),)
|
return (self._get_post_data(self.post_id),)
|
||||||
|
|||||||
@@ -186,7 +186,7 @@ class FantiaCreatorExtractor(FantiaExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FantiaExtractor.__init__(self, match)
|
FantiaExtractor.__init__(self, match)
|
||||||
self.creator_id = match.group(1)
|
self.creator_id = match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/fanclubs/{}/posts".format(self.root, self.creator_id)
|
url = "{}/fanclubs/{}/posts".format(self.root, self.creator_id)
|
||||||
@@ -201,7 +201,7 @@ class FantiaPostExtractor(FantiaExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FantiaExtractor.__init__(self, match)
|
FantiaExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(1)
|
self.post_id = match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
self._csrf_token()
|
self._csrf_token()
|
||||||
|
|||||||
@@ -50,8 +50,8 @@ class FapachiUserExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
self.num = text.parse_int(match.group(2), 1)
|
self.num = text.parse_int(match[2], 1)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = {"_extractor": FapachiPostExtractor}
|
data = {"_extractor": FapachiPostExtractor}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class FapelloPostExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.root = text.root_from_url(match.group(0))
|
self.root = text.root_from_url(match[0])
|
||||||
self.model, self.id = match.groups()
|
self.model, self.id = match.groups()
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -59,8 +59,8 @@ class FapelloModelExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.root = text.root_from_url(match.group(0))
|
self.root = text.root_from_url(match[0])
|
||||||
self.model = match.group(1)
|
self.model = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
num = 1
|
num = 1
|
||||||
@@ -93,8 +93,8 @@ class FapelloPathExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.root = text.root_from_url(match.group(0))
|
self.root = text.root_from_url(match[0])
|
||||||
self.path = match.group(1)
|
self.path = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
num = 1
|
num = 1
|
||||||
|
|||||||
@@ -273,9 +273,9 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FoolfuukaExtractor.__init__(self, match)
|
FoolfuukaExtractor.__init__(self, match)
|
||||||
|
|
||||||
board = match.group(match.lastindex)
|
board = match[match.lastindex]
|
||||||
if board.isdecimal():
|
if board.isdecimal():
|
||||||
self.board = match.group(match.lastindex-1)
|
self.board = match[match.lastindex-1]
|
||||||
self.pages = (board,)
|
self.pages = (board,)
|
||||||
else:
|
else:
|
||||||
self.board = board
|
self.board = board
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class FoolslideExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.gallery_url = self.root + match.group(match.lastindex)
|
self.gallery_url = self.root + match[match.lastindex]
|
||||||
|
|
||||||
def request(self, url):
|
def request(self, url):
|
||||||
return BaseExtractor.request(
|
return BaseExtractor.request(
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ class FuraffinityExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
self.offset = 0
|
self.offset = 0
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
@@ -297,7 +297,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FuraffinityExtractor.__init__(self, match)
|
FuraffinityExtractor.__init__(self, match)
|
||||||
self.query = text.parse_query(match.group(2))
|
self.query = text.parse_query(match[2])
|
||||||
if self.user and "q" not in self.query:
|
if self.user and "q" not in self.query:
|
||||||
self.query["q"] = text.unquote(self.user)
|
self.query["q"] = text.unquote(self.user)
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
|
|||||||
example = "https://fuskator.com/thumbs/ID/"
|
example = "https://fuskator.com/thumbs/ID/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.gallery_hash = match.group(1)
|
self.gallery_hash = match[1]
|
||||||
url = "{}/thumbs/{}/index.html".format(self.root, self.gallery_hash)
|
url = "{}/thumbs/{}/index.html".format(self.root, self.gallery_hash)
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
@@ -72,7 +72,7 @@ class FuskatorSearchExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.path = match.group(1)
|
self.path = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = self.root + self.path
|
url = self.root + self.path
|
||||||
|
|||||||
@@ -292,7 +292,7 @@ class GelbooruRedirectExtractor(GelbooruBase, Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.url_base64 = match.group(1)
|
self.url_base64 = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = text.ensure_http_scheme(binascii.a2b_base64(
|
url = text.ensure_http_scheme(binascii.a2b_base64(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2021-2023 Mike Fährmann
|
# Copyright 2021-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -94,7 +94,7 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV01Extractor.__init__(self, match)
|
GelbooruV01Extractor.__init__(self, match)
|
||||||
self.tags = match.group(match.lastindex)
|
self.tags = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unquote(self.tags.replace("+", " "))}
|
return {"search_tags": text.unquote(self.tags.replace("+", " "))}
|
||||||
@@ -115,7 +115,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV01Extractor.__init__(self, match)
|
GelbooruV01Extractor.__init__(self, match)
|
||||||
self.favorite_id = match.group(match.lastindex)
|
self.favorite_id = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"favorite_id": text.parse_int(self.favorite_id)}
|
return {"favorite_id": text.parse_int(self.favorite_id)}
|
||||||
@@ -134,7 +134,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV01Extractor.__init__(self, match)
|
GelbooruV01Extractor.__init__(self, match)
|
||||||
self.post_id = match.group(match.lastindex)
|
self.post_id = match[match.lastindex]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self._parse_post(self.post_id),)
|
return (self._parse_post(self.post_id),)
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
GelbooruV02Extractor.__init__(self, match)
|
||||||
tags = match.group(match.lastindex)
|
tags = match[match.lastindex]
|
||||||
self.tags = text.unquote(tags.replace("+", " "))
|
self.tags = text.unquote(tags.replace("+", " "))
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -184,7 +184,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
GelbooruV02Extractor.__init__(self, match)
|
||||||
self.pool_id = match.group(match.lastindex)
|
self.pool_id = match[match.lastindex]
|
||||||
|
|
||||||
if self.category == "rule34":
|
if self.category == "rule34":
|
||||||
self.posts = self._posts_pages
|
self.posts = self._posts_pages
|
||||||
@@ -236,7 +236,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
GelbooruV02Extractor.__init__(self, match)
|
||||||
self.favorite_id = match.group(match.lastindex)
|
self.favorite_id = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"favorite_id": text.parse_int(self.favorite_id)}
|
return {"favorite_id": text.parse_int(self.favorite_id)}
|
||||||
@@ -257,7 +257,7 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
GelbooruV02Extractor.__init__(self, match)
|
||||||
self.post_id = match.group(match.lastindex)
|
self.post_id = match[match.lastindex]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return self._pagination({"id": self.post_id})
|
return self._pagination({"id": self.post_id})
|
||||||
|
|||||||
@@ -36,28 +36,28 @@ class GenericExtractor(Extractor):
|
|||||||
example = "generic:https://www.nongnu.org/lzip/"
|
example = "generic:https://www.nongnu.org/lzip/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.subcategory = match.group('domain')
|
self.subcategory = match['domain']
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
# Strip the "g(eneric):" prefix
|
# Strip the "g(eneric):" prefix
|
||||||
# and inform about "forced" or "fallback" mode
|
# and inform about "forced" or "fallback" mode
|
||||||
if match.group('generic'):
|
if match['generic']:
|
||||||
self.url = match.group(0).partition(":")[2]
|
self.url = match[0].partition(":")[2]
|
||||||
else:
|
else:
|
||||||
self.log.info("Falling back on generic information extractor.")
|
self.log.info("Falling back on generic information extractor.")
|
||||||
self.url = match.group(0)
|
self.url = match[0]
|
||||||
|
|
||||||
# Make sure we have a scheme, or use https
|
# Make sure we have a scheme, or use https
|
||||||
if match.group('scheme'):
|
if match['scheme']:
|
||||||
self.scheme = match.group('scheme')
|
self.scheme = match['scheme']
|
||||||
else:
|
else:
|
||||||
self.scheme = 'https://'
|
self.scheme = 'https://'
|
||||||
self.url = text.ensure_http_scheme(self.url, self.scheme)
|
self.url = text.ensure_http_scheme(self.url, self.scheme)
|
||||||
|
|
||||||
self.path = match.group('path')
|
self.path = match['path']
|
||||||
|
|
||||||
# Used to resolve relative image urls
|
# Used to resolve relative image urls
|
||||||
self.root = self.scheme + match.group('domain')
|
self.root = self.scheme + match['domain']
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
"""Get page, extract metadata & images, yield them in suitable messages
|
"""Get page, extract metadata & images, yield them in suitable messages
|
||||||
@@ -184,7 +184,7 @@ class GenericExtractor(Extractor):
|
|||||||
basematch = util.re(
|
basematch = util.re(
|
||||||
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)").search(page)
|
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)").search(page)
|
||||||
if basematch:
|
if basematch:
|
||||||
self.baseurl = basematch.group('url').rstrip('/')
|
self.baseurl = basematch['url'].rstrip('/')
|
||||||
# Otherwise, extract the base url from self.url
|
# Otherwise, extract the base url from self.url
|
||||||
else:
|
else:
|
||||||
if self.url.endswith("/"):
|
if self.url.endswith("/"):
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class GofileFolderExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.content_id = match.group(1)
|
self.content_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
recursive = self.config("recursive")
|
recursive = self.config("recursive")
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ class HatenablogExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.domain = match.group(1) or match.group(2)
|
self.domain = match[1] or match[2]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self._find_img = util.re(r'<img +([^>]+)').finditer
|
self._find_img = util.re(r'<img +([^>]+)').finditer
|
||||||
@@ -42,8 +42,8 @@ class HatenablogExtractor(Extractor):
|
|||||||
'<div class="entry-content hatenablog-entry">', '</div>')
|
'<div class="entry-content hatenablog-entry">', '</div>')
|
||||||
|
|
||||||
images = []
|
images = []
|
||||||
for i in self._find_img(content):
|
for match in self._find_img(content):
|
||||||
attributes = i.group(1)
|
attributes = match[1]
|
||||||
if 'class="hatena-fotolife"' not in attributes:
|
if 'class="hatena-fotolife"' not in attributes:
|
||||||
continue
|
continue
|
||||||
image = text.unescape(text.extr(attributes, 'src="', '"'))
|
image = text.unescape(text.extr(attributes, 'src="', '"'))
|
||||||
@@ -67,9 +67,9 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
HatenablogExtractor.__init__(self, match)
|
HatenablogExtractor.__init__(self, match)
|
||||||
self.path = match.group(3)
|
self.path = match[3]
|
||||||
self.query = {key: value for key, value in text.parse_query(
|
self.query = {key: value for key, value in text.parse_query(
|
||||||
match.group(4)).items() if self._acceptable_query(key)}
|
match[4]).items() if self._acceptable_query(key)}
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
HatenablogExtractor._init(self)
|
HatenablogExtractor._init(self)
|
||||||
@@ -91,7 +91,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
|
|||||||
yield from self._handle_full_articles(extr)
|
yield from self._handle_full_articles(extr)
|
||||||
|
|
||||||
match = self._find_pager_url(page)
|
match = self._find_pager_url(page)
|
||||||
url = text.unescape(match.group(1)) if match else None
|
url = text.unescape(match[1]) if match else None
|
||||||
query = None
|
query = None
|
||||||
|
|
||||||
def _handle_partial_articles(self, extr):
|
def _handle_partial_articles(self, extr):
|
||||||
@@ -128,7 +128,7 @@ class HatenablogEntryExtractor(HatenablogExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
HatenablogExtractor.__init__(self, match)
|
HatenablogExtractor.__init__(self, match)
|
||||||
self.path = match.group(3)
|
self.path = match[3]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "https://" + self.domain + "/entry/" + self.path
|
url = "https://" + self.domain + "/entry/" + self.path
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
per_page = 25
|
per_page = 25
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.root = (match.group(1) or "https://") + "www.hentai-foundry.com"
|
self.root = (match[1] or "https://") + "www.hentai-foundry.com"
|
||||||
self.user = match.group(2)
|
self.user = match[2]
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.page_url = ""
|
self.page_url = ""
|
||||||
self.start_post = 0
|
self.start_post = 0
|
||||||
@@ -306,7 +306,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
HentaifoundryExtractor.__init__(self, match)
|
HentaifoundryExtractor.__init__(self, match)
|
||||||
self.index = match.group(3)
|
self.index = match[3]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format(
|
post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format(
|
||||||
@@ -347,7 +347,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
HentaifoundryExtractor.__init__(self, match)
|
HentaifoundryExtractor.__init__(self, match)
|
||||||
self.index = match.group(3)
|
self.index = match[3]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(
|
story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2020-2023 Mike Fährmann
|
# Copyright 2020-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -20,7 +20,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
|
|||||||
example = "https://hentaihand.com/en/comic/TITLE"
|
example = "https://hentaihand.com/en/comic/TITLE"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.slug = match.group(1)
|
self.slug = match[1]
|
||||||
url = "{}/api/comics/{}".format(self.root, self.slug)
|
url = "{}/api/comics/{}".format(self.root, self.slug)
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
|
|||||||
@@ -37,14 +37,14 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
|
|||||||
r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by "
|
r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by "
|
||||||
r"(.+) at ").match(title)
|
r"(.+) at ").match(title)
|
||||||
return {
|
return {
|
||||||
"manga": match.group(1),
|
"manga": match[1],
|
||||||
"manga_id": text.parse_int(self.manga_id),
|
"manga_id": text.parse_int(self.manga_id),
|
||||||
"chapter": text.parse_int(chapter),
|
"chapter": text.parse_int(chapter),
|
||||||
"chapter_minor": sep + minor,
|
"chapter_minor": sep + minor,
|
||||||
"chapter_id": text.parse_int(chapter_id),
|
"chapter_id": text.parse_int(chapter_id),
|
||||||
"type": match.group(2),
|
"type": match[2],
|
||||||
"title": match.group(3),
|
"title": match[3],
|
||||||
"author": match.group(4),
|
"author": match[4],
|
||||||
"lang": "en",
|
"lang": "en",
|
||||||
"language": "English",
|
"language": "English",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
|
|||||||
example = "https://hentainexus.com/view/12345"
|
example = "https://hentainexus.com/view/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.gallery_id = match.group(1)
|
self.gallery_id = match[1]
|
||||||
url = "{}/view/{}".format(self.root, self.gallery_id)
|
url = "{}/view/{}".format(self.root, self.gallery_id)
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
|
|||||||
@@ -129,8 +129,8 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
|
|||||||
example = "https://hiperdex.com/manga-artist/NAME/"
|
example = "https://hiperdex.com/manga-artist/NAME/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.root = text.ensure_http_scheme(match.group(1))
|
self.root = text.ensure_http_scheme(match[1])
|
||||||
MangaExtractor.__init__(self, match, self.root + match.group(2) + "/")
|
MangaExtractor.__init__(self, match, self.root + match[2] + "/")
|
||||||
|
|
||||||
def chapters(self, page):
|
def chapters(self, page):
|
||||||
results = []
|
results = []
|
||||||
|
|||||||
@@ -198,7 +198,7 @@ class HitomiSearchExtractor(HitomiExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.query = match.group(1)
|
self.query = match[1]
|
||||||
self.tags = text.unquote(self.query)
|
self.tags = text.unquote(self.query)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -269,9 +269,9 @@ def _parse_gg(extr):
|
|||||||
|
|
||||||
for match in util.re_compile(
|
for match in util.re_compile(
|
||||||
r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)").finditer(page):
|
r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)").finditer(page):
|
||||||
m[int(match.group(1))] = int(match.group(2))
|
m[int(match[1])] = int(match[2])
|
||||||
|
|
||||||
d = util.re_compile(r"(?:var\s|default:)\s*o\s*=\s*(\d+)").search(page)
|
d = util.re_compile(r"(?:var\s|default:)\s*o\s*=\s*(\d+)").search(page)
|
||||||
b = util.re_compile(r"b:\s*[\"'](.+)[\"']").search(page)
|
b = util.re_compile(r"b:\s*[\"'](.+)[\"']").search(page)
|
||||||
|
|
||||||
return m, b.group(1).strip("/"), int(d.group(1)) if d else 0
|
return m, b[1].strip("/"), int(d[1]) if d else 0
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
HotleakExtractor.__init__(self, match)
|
HotleakExtractor.__init__(self, match)
|
||||||
self.creator = match.group(1)
|
self.creator = match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/{}".format(self.root, self.creator)
|
url = "{}/{}".format(self.root, self.creator)
|
||||||
@@ -178,7 +178,7 @@ class HotleakSearchExtractor(HotleakExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
HotleakExtractor.__init__(self, match)
|
HotleakExtractor.__init__(self, match)
|
||||||
self.params = match.group(1)
|
self.params = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = {"_extractor": HotleakCreatorExtractor}
|
data = {"_extractor": HotleakCreatorExtractor}
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
IdolcomplexExtractor.__init__(self, match)
|
IdolcomplexExtractor.__init__(self, match)
|
||||||
query = text.parse_query(match.group(1))
|
query = text.parse_query(match[1])
|
||||||
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
|
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
|
||||||
self.start_page = text.parse_int(query.get("page"), 1)
|
self.start_page = text.parse_int(query.get("page"), 1)
|
||||||
self.next = text.parse_int(query.get("next"), 0)
|
self.next = text.parse_int(query.get("next"), 0)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class ImagebamExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.path = match.group(1)
|
self.path = match[1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
|
self.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2020 Leonid "Bepis" Pavel
|
# Copyright 2020 Leonid "Bepis" Pavel
|
||||||
# Copyright 2023 Mike Fährmann
|
# Copyright 2023-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -23,7 +23,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
|
|||||||
example = "https://imgchest.com/p/abcdefghijk"
|
example = "https://imgchest.com/p/abcdefghijk"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.gallery_id = match.group(1)
|
self.gallery_id = match[1]
|
||||||
url = self.root + "/p/" + self.gallery_id
|
url = self.root + "/p/" + self.gallery_id
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImagefapExtractor.__init__(self, match)
|
ImagefapExtractor.__init__(self, match)
|
||||||
self.gid = match.group(1)
|
self.gid = match[1]
|
||||||
self.image_id = ""
|
self.image_id = ""
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -116,7 +116,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImagefapExtractor.__init__(self, match)
|
ImagefapExtractor.__init__(self, match)
|
||||||
self.image_id = match.group(1)
|
self.image_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url, data = self.get_image()
|
url, data = self.get_image()
|
||||||
|
|||||||
@@ -28,8 +28,8 @@ class ImagehostImageExtractor(Extractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.page_url = "http{}://{}".format(
|
self.page_url = "http{}://{}".format(
|
||||||
"s" if self._https else "", match.group(1))
|
"s" if self._https else "", match[1])
|
||||||
self.token = match.group(2)
|
self.token = match[2]
|
||||||
|
|
||||||
if self._params == "simple":
|
if self._params == "simple":
|
||||||
self._params = {
|
self._params = {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2019-2023 Mike Fährmann
|
# Copyright 2019-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -126,8 +126,8 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImgbbExtractor.__init__(self, match)
|
ImgbbExtractor.__init__(self, match)
|
||||||
self.album_name = None
|
self.album_name = None
|
||||||
self.album_id = match.group(1)
|
self.album_id = match[1]
|
||||||
self.sort = text.parse_query(match.group(2)).get("sort", "date_desc")
|
self.sort = text.parse_query(match[2]).get("sort", "date_desc")
|
||||||
self.page_url = "https://ibb.co/album/" + self.album_id
|
self.page_url = "https://ibb.co/album/" + self.album_id
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
@@ -162,8 +162,8 @@ class ImgbbUserExtractor(ImgbbExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImgbbExtractor.__init__(self, match)
|
ImgbbExtractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
self.sort = text.parse_query(match.group(2)).get("sort", "date_desc")
|
self.sort = text.parse_query(match[2]).get("sort", "date_desc")
|
||||||
self.page_url = "https://{}.imgbb.com/".format(self.user)
|
self.page_url = "https://{}.imgbb.com/".format(self.user)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
@@ -191,7 +191,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImgbbExtractor.__init__(self, match)
|
ImgbbExtractor.__init__(self, match)
|
||||||
self.image_id = match.group(1)
|
self.image_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "https://ibb.co/" + self.image_id
|
url = "https://ibb.co/" + self.image_id
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImgboxExtractor.__init__(self, match)
|
ImgboxExtractor.__init__(self, match)
|
||||||
self.gallery_key = match.group(1)
|
self.gallery_key = match[1]
|
||||||
self.image_keys = []
|
self.image_keys = []
|
||||||
|
|
||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
@@ -93,7 +93,7 @@ class ImgboxImageExtractor(ImgboxExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImgboxExtractor.__init__(self, match)
|
ImgboxExtractor.__init__(self, match)
|
||||||
self.image_key = match.group(1)
|
self.image_key = match[1]
|
||||||
|
|
||||||
def get_image_keys(self):
|
def get_image_keys(self):
|
||||||
return (self.image_key,)
|
return (self.image_key,)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2015-2023 Mike Fährmann
|
# Copyright 2015-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -20,7 +20,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
|
|||||||
example = "https://imgth.com/gallery/123/TITLE"
|
example = "https://imgth.com/gallery/123/TITLE"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.gallery_id = gid = match.group(1)
|
self.gallery_id = gid = match[1]
|
||||||
url = "{}/gallery/{}/g/".format(self.root, gid)
|
url = "{}/gallery/{}/g/".format(self.root, gid)
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class ImgurExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.key = match.group(1)
|
self.key = match[1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.api = ImgurAPI(self)
|
self.api = ImgurAPI(self)
|
||||||
@@ -168,7 +168,7 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ImgurExtractor.__init__(self, match)
|
ImgurExtractor.__init__(self, match)
|
||||||
self.folder_id = match.group(2)
|
self.folder_id = match[2]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
return self._items_queue(self.api.account_favorites_folder(
|
return self._items_queue(self.api.account_favorites_folder(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2020-2023 Mike Fährmann
|
# Copyright 2020-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -109,12 +109,12 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
pid = match.group(1)
|
pid = match[1]
|
||||||
if pid:
|
if pid:
|
||||||
self.pool_id = pid
|
self.pool_id = pid
|
||||||
self.orderby = "pool_order"
|
self.orderby = "pool_order"
|
||||||
else:
|
else:
|
||||||
params = text.parse_query(match.group(2))
|
params = text.parse_query(match[2])
|
||||||
self.pool_id = params.get("pool_id")
|
self.pool_id = params.get("pool_id")
|
||||||
self.orderby = params.get("orderby", "pool_order")
|
self.orderby = params.get("orderby", "pool_order")
|
||||||
|
|
||||||
@@ -142,12 +142,12 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
uid = match.group(1)
|
uid = match[1]
|
||||||
if uid:
|
if uid:
|
||||||
self.user_id = uid
|
self.user_id = uid
|
||||||
self.orderby = self.config("orderby", "fav_datetime")
|
self.orderby = self.config("orderby", "fav_datetime")
|
||||||
else:
|
else:
|
||||||
params = text.parse_query(match.group(2))
|
params = text.parse_query(match[2])
|
||||||
self.user_id = params.get("user_id")
|
self.user_id = params.get("user_id")
|
||||||
self.orderby = params.get("orderby", "fav_datetime")
|
self.orderby = params.get("orderby", "fav_datetime")
|
||||||
|
|
||||||
@@ -184,7 +184,7 @@ class InkbunnyUnreadExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
self.params = text.parse_query(match.group(1))
|
self.params = text.parse_query(match[1])
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
params = self.params.copy()
|
params = self.params.copy()
|
||||||
@@ -204,7 +204,7 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
self.params = text.parse_query(match.group(1))
|
self.params = text.parse_query(match[1])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search": self.params}
|
return {"search": self.params}
|
||||||
@@ -241,8 +241,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
self.user_id = match.group(1) or \
|
self.user_id = match[1] or \
|
||||||
text.parse_query(match.group(2)).get("user_id")
|
text.parse_query(match[2]).get("user_id")
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = self.root + "/watchlist_process.php"
|
url = self.root + "/watchlist_process.php"
|
||||||
@@ -276,7 +276,7 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
self.submission_id = match.group(1)
|
self.submission_id = match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
submissions = self.api.detail(({"submission_id": self.submission_id},))
|
submissions = self.api.detail(({"submission_id": self.submission_id},))
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ class InstagramExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.item = match.group(1)
|
self.item = match[1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.www_claim = "0"
|
self.www_claim = "0"
|
||||||
@@ -513,7 +513,7 @@ class InstagramGuideExtractor(InstagramExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InstagramExtractor.__init__(self, match)
|
InstagramExtractor.__init__(self, match)
|
||||||
self.guide_id = match.group(2)
|
self.guide_id = match[2]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"guide": self.api.guide(self.guide_id)}
|
return {"guide": self.api.guide(self.guide_id)}
|
||||||
|
|||||||
@@ -36,8 +36,8 @@ class JschanThreadExtractor(JschanExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
JschanExtractor.__init__(self, match)
|
JschanExtractor.__init__(self, match)
|
||||||
index = match.lastindex
|
index = match.lastindex
|
||||||
self.board = match.group(index-1)
|
self.board = match[index-1]
|
||||||
self.thread = match.group(index)
|
self.thread = match[index]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/thread/{}.json".format(
|
url = "{}/{}/thread/{}.json".format(
|
||||||
@@ -70,7 +70,7 @@ class JschanBoardExtractor(JschanExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
JschanExtractor.__init__(self, match)
|
JschanExtractor.__init__(self, match)
|
||||||
self.board = match.group(match.lastindex)
|
self.board = match[match.lastindex]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/catalog.json".format(self.root, self.board)
|
url = "{}/{}/catalog.json".format(self.root, self.board)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2020-2023 Mike Fährmann
|
# Copyright 2020-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -25,7 +25,7 @@ class KabeuchiUserExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user_id = match.group(1)
|
self.user_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
base = "{}/accounts/upfile/{}/{}/".format(
|
base = "{}/accounts/upfile/{}/{}/".format(
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ class KeenspotComicExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.comic = match.group(1).lower()
|
self.comic = match[1].lower()
|
||||||
self.path = match.group(2)
|
self.path = match[2]
|
||||||
self.root = "http://" + self.comic + ".keenspot.com"
|
self.root = "http://" + self.comic + ".keenspot.com"
|
||||||
|
|
||||||
self._needle = ""
|
self._needle = ""
|
||||||
|
|||||||
@@ -29,9 +29,9 @@ class KemonoExtractor(Extractor):
|
|||||||
cookies_domain = ".kemono.su"
|
cookies_domain = ".kemono.su"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
tld = match.group(2)
|
tld = match[2]
|
||||||
self.category = domain = match.group(1)
|
self.category = domain = match[1]
|
||||||
self.root = text.root_from_url(match.group(0))
|
self.root = text.root_from_url(match[0])
|
||||||
self.cookies_domain = ".{}.{}".format(domain, tld)
|
self.cookies_domain = ".{}.{}".format(domain, tld)
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
@@ -125,7 +125,7 @@ class KemonoExtractor(Extractor):
|
|||||||
|
|
||||||
match = find_hash(url)
|
match = find_hash(url)
|
||||||
if match:
|
if match:
|
||||||
file["hash"] = hash = match.group(1)
|
file["hash"] = hash = match[1]
|
||||||
if not duplicates:
|
if not duplicates:
|
||||||
if hash in hashes:
|
if hash in hashes:
|
||||||
self.log.debug("Skipping %s (duplicate)", url)
|
self.log.debug("Skipping %s (duplicate)", url)
|
||||||
@@ -310,7 +310,7 @@ class KemonoUserExtractor(KemonoExtractor):
|
|||||||
example = "https://kemono.su/SERVICE/user/12345"
|
example = "https://kemono.su/SERVICE/user/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.subcategory = match.group(3)
|
self.subcategory = match[3]
|
||||||
KemonoExtractor.__init__(self, match)
|
KemonoExtractor.__init__(self, match)
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
@@ -356,7 +356,7 @@ class KemonoPostExtractor(KemonoExtractor):
|
|||||||
example = "https://kemono.su/SERVICE/user/12345/post/12345"
|
example = "https://kemono.su/SERVICE/user/12345/post/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.subcategory = match.group(3)
|
self.subcategory = match[3]
|
||||||
KemonoExtractor.__init__(self, match)
|
KemonoExtractor.__init__(self, match)
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
@@ -423,7 +423,7 @@ class KemonoDiscordExtractor(KemonoExtractor):
|
|||||||
append = files.append
|
append = files.append
|
||||||
for attachment in post["attachments"]:
|
for attachment in post["attachments"]:
|
||||||
match = find_hash(attachment["path"])
|
match = find_hash(attachment["path"])
|
||||||
attachment["hash"] = match.group(1) if match else ""
|
attachment["hash"] = match[1] if match else ""
|
||||||
attachment["type"] = "attachment"
|
attachment["type"] = "attachment"
|
||||||
append(attachment)
|
append(attachment)
|
||||||
for path in find_inline(post["content"] or ""):
|
for path in find_inline(post["content"] or ""):
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.album = match.group(1)
|
self.album = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = self.root + "/game-soundtracks/album/" + self.album
|
url = self.root + "/game-soundtracks/album/" + self.album
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2023 Mike Fährmann
|
# Copyright 2023-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -24,7 +24,7 @@ class LexicaSearchExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.query = match.group(1)
|
self.query = match[1]
|
||||||
self.text = text.unquote(self.query).replace("+", " ")
|
self.text = text.unquote(self.query).replace("+", " ")
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class LightroomGalleryExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.href = match.group(1)
|
self.href = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
# Get config
|
# Get config
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2019-2023 Mike Fährmann
|
# Copyright 2019-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -22,7 +22,7 @@ class LivedoorExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
@@ -108,7 +108,7 @@ class LivedoorPostExtractor(LivedoorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
LivedoorExtractor.__init__(self, match)
|
LivedoorExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(2)
|
self.post_id = match[2]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/{}/archives/{}.html".format(
|
url = "{}/{}/archives/{}.html".format(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2016-2023 Mike Fährmann
|
# Copyright 2016-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -51,7 +51,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
LusciousExtractor.__init__(self, match)
|
LusciousExtractor.__init__(self, match)
|
||||||
self.album_id = match.group(1)
|
self.album_id = match[1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.gif = self.config("gif", False)
|
self.gif = self.config("gif", False)
|
||||||
@@ -280,7 +280,7 @@ class LusciousSearchExtractor(LusciousExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
LusciousExtractor.__init__(self, match)
|
LusciousExtractor.__init__(self, match)
|
||||||
self.query = match.group(1)
|
self.query = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
query = text.parse_query(self.query)
|
query = text.parse_query(self.query)
|
||||||
|
|||||||
@@ -45,8 +45,8 @@ class LynxchanThreadExtractor(LynxchanExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
LynxchanExtractor.__init__(self, match)
|
LynxchanExtractor.__init__(self, match)
|
||||||
index = match.lastindex
|
index = match.lastindex
|
||||||
self.board = match.group(index-1)
|
self.board = match[index-1]
|
||||||
self.thread = match.group(index)
|
self.thread = match[index]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
|
url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
|
||||||
@@ -75,7 +75,7 @@ class LynxchanBoardExtractor(LynxchanExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
LynxchanExtractor.__init__(self, match)
|
LynxchanExtractor.__init__(self, match)
|
||||||
self.board = match.group(match.lastindex)
|
self.board = match[match.lastindex]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/catalog.json".format(self.root, self.board)
|
url = "{}/{}/catalog.json".format(self.root, self.board)
|
||||||
|
|||||||
@@ -172,7 +172,7 @@ class MangadexListExtractor(MangadexExtractor):
|
|||||||
"/01234567-89ab-cdef-0123-456789abcdef/NAME")
|
"/01234567-89ab-cdef-0123-456789abcdef/NAME")
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
if match.group(2) == "feed":
|
if match[2] == "feed":
|
||||||
self.subcategory = "list-feed"
|
self.subcategory = "list-feed"
|
||||||
else:
|
else:
|
||||||
self.items = self._items_manga
|
self.items = self._items_manga
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
|||||||
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
|
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.root = text.root_from_url(match.group(0))
|
self.root = text.root_from_url(match[0])
|
||||||
ChapterExtractor.__init__(self, match, False)
|
ChapterExtractor.__init__(self, match, False)
|
||||||
|
|
||||||
def metadata(self, _):
|
def metadata(self, _):
|
||||||
@@ -115,8 +115,8 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
|
|||||||
example = "https://mangapark.net/title/12345-MANGA"
|
example = "https://mangapark.net/title/12345-MANGA"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.root = text.root_from_url(match.group(0))
|
self.root = text.root_from_url(match[0])
|
||||||
self.manga_id = int(match.group(1))
|
self.manga_id = int(match[1])
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MangoxoExtractor.__init__(self, match)
|
MangoxoExtractor.__init__(self, match)
|
||||||
self.album_id = match.group(1)
|
self.album_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.login()
|
self.login()
|
||||||
@@ -147,7 +147,7 @@ class MangoxoChannelExtractor(MangoxoExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MangoxoExtractor.__init__(self, match)
|
MangoxoExtractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.login()
|
self.login()
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class MastodonExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.item = match.group(match.lastindex)
|
self.item = match[match.lastindex]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.instance = self.root.partition("://")[2]
|
self.instance = self.root.partition("://")[2]
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class MisskeyExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.item = match.group(match.lastindex)
|
self.item = match[match.lastindex]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.api = MisskeyAPI(self)
|
self.api = MisskeyAPI(self)
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ class MoebooruTagExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MoebooruExtractor.__init__(self, match)
|
MoebooruExtractor.__init__(self, match)
|
||||||
tags = match.group(match.lastindex)
|
tags = match[match.lastindex]
|
||||||
self.tags = text.unquote(tags.replace("+", " "))
|
self.tags = text.unquote(tags.replace("+", " "))
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -118,7 +118,7 @@ class MoebooruPoolExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MoebooruExtractor.__init__(self, match)
|
MoebooruExtractor.__init__(self, match)
|
||||||
self.pool_id = match.group(match.lastindex)
|
self.pool_id = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
if self.config("metadata"):
|
if self.config("metadata"):
|
||||||
@@ -142,7 +142,7 @@ class MoebooruPostExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MoebooruExtractor.__init__(self, match)
|
MoebooruExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(match.lastindex)
|
self.post_id = match[match.lastindex]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
params = {"tags": "id:" + self.post_id}
|
params = {"tags": "id:" + self.post_id}
|
||||||
@@ -159,8 +159,8 @@ class MoebooruPopularExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MoebooruExtractor.__init__(self, match)
|
MoebooruExtractor.__init__(self, match)
|
||||||
self.scale = match.group(match.lastindex-1)
|
self.scale = match[match.lastindex-1]
|
||||||
self.query = match.group(match.lastindex)
|
self.query = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
self.params = params = text.parse_query(self.query)
|
self.params = params = text.parse_query(self.query)
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
|
|||||||
example = "https://myhentaigallery.com/g/12345"
|
example = "https://myhentaigallery.com/g/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.gallery_id = match.group(1)
|
self.gallery_id = match[1]
|
||||||
url = "{}/g/{}".format(self.root, self.gallery_id)
|
url = "{}/g/{}".format(self.root, self.gallery_id)
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
|
|||||||
@@ -33,13 +33,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
|
|||||||
example = "https://blog.naver.com/BLOGID/12345"
|
example = "https://blog.naver.com/BLOGID/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
blog_id = match.group(1)
|
blog_id = match[1]
|
||||||
if blog_id:
|
if blog_id:
|
||||||
self.blog_id = blog_id
|
self.blog_id = blog_id
|
||||||
self.post_id = match.group(2)
|
self.post_id = match[2]
|
||||||
else:
|
else:
|
||||||
self.blog_id = match.group(3)
|
self.blog_id = match[3]
|
||||||
self.post_id = match.group(4)
|
self.post_id = match[4]
|
||||||
|
|
||||||
url = "{}/PostView.nhn?blogId={}&logNo={}".format(
|
url = "{}/PostView.nhn?blogId={}&logNo={}".format(
|
||||||
self.root, self.blog_id, self.post_id)
|
self.root, self.blog_id, self.post_id)
|
||||||
@@ -134,7 +134,7 @@ class NaverBlogExtractor(NaverBase, Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.blog_id = match.group(1) or match.group(2)
|
self.blog_id = match[1] or match[2]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
# fetch first post number
|
# fetch first post number
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
self.user_root = "https://{}.newgrounds.com".format(self.user)
|
self.user_root = "https://{}.newgrounds.com".format(self.user)
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
@@ -397,12 +397,12 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
NewgroundsExtractor.__init__(self, match)
|
NewgroundsExtractor.__init__(self, match)
|
||||||
if match.group(2):
|
if match[2]:
|
||||||
self.user = match.group(2)
|
self.user = match[2]
|
||||||
self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
|
self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
|
||||||
self.user, match.group(3))
|
self.user, match[3])
|
||||||
else:
|
else:
|
||||||
self.post_url = text.ensure_http_scheme(match.group(0))
|
self.post_url = text.ensure_http_scheme(match[0])
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self.post_url,)
|
return (self.post_url,)
|
||||||
@@ -417,7 +417,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
NewgroundsExtractor.__init__(self, match)
|
NewgroundsExtractor.__init__(self, match)
|
||||||
self.user = ""
|
self.user = ""
|
||||||
self.post_url = self.root + match.group(1)
|
self.post_url = self.root + match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self.post_url,)
|
return (self.post_url,)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2015-2023 Mike Fährmann
|
# Copyright 2015-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -22,7 +22,7 @@ class NhentaiGalleryExtractor(GalleryExtractor):
|
|||||||
example = "https://nhentai.net/g/12345/"
|
example = "https://nhentai.net/g/12345/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
url = self.root + "/api/gallery/" + match.group(1)
|
url = self.root + "/api/gallery/" + match[1]
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.user_id = text.parse_int(match.group(match.lastindex))
|
self.user_id = text.parse_int(match[match.lastindex])
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
self.cookies_domain = "." + self.root.rpartition("/")[2]
|
self.cookies_domain = "." + self.root.rpartition("/")[2]
|
||||||
@@ -296,7 +296,7 @@ class NijieImageExtractor(NijieExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
NijieExtractor.__init__(self, match)
|
NijieExtractor.__init__(self, match)
|
||||||
self.image_id = match.group(match.lastindex)
|
self.image_id = match[match.lastindex]
|
||||||
|
|
||||||
def image_ids(self):
|
def image_ids(self):
|
||||||
return (self.image_id,)
|
return (self.image_id,)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2022-2023 Mike Fährmann
|
# Copyright 2022-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -25,8 +25,8 @@ class NitterExtractor(BaseExtractor):
|
|||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
|
|
||||||
lastindex = match.lastindex
|
lastindex = match.lastindex
|
||||||
self.user = match.group(lastindex)
|
self.user = match[lastindex]
|
||||||
self.user_id = match.group(lastindex + 1)
|
self.user_id = match[lastindex + 1]
|
||||||
self.user_obj = None
|
self.user_obj = None
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ class NozomiPostExtractor(NozomiExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
NozomiExtractor.__init__(self, match)
|
NozomiExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(1)
|
self.post_id = match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self.post_id,)
|
return (self.post_id,)
|
||||||
@@ -157,7 +157,7 @@ class NozomiSearchExtractor(NozomiExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
NozomiExtractor.__init__(self, match)
|
NozomiExtractor.__init__(self, match)
|
||||||
self.tags = text.unquote(match.group(1)).split()
|
self.tags = text.unquote(match[1]).split()
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": self.tags}
|
return {"search_tags": self.tags}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
|
|||||||
example = "https://nsfwalbum.com/album/12345"
|
example = "https://nsfwalbum.com/album/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.album_id = match.group(2)
|
self.album_id = match[2]
|
||||||
GalleryExtractor.__init__(self, match)
|
GalleryExtractor.__init__(self, match)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
|
|||||||
@@ -354,7 +354,7 @@ class OAuthMastodon(OAuthBase):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
OAuthBase.__init__(self, match)
|
OAuthBase.__init__(self, match)
|
||||||
self.instance = match.group(1)
|
self.instance = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PhilomenaExtractor.__init__(self, match)
|
PhilomenaExtractor.__init__(self, match)
|
||||||
self.image_id = match.group(match.lastindex)
|
self.image_id = match[match.lastindex]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self.api.image(self.image_id),)
|
return (self.api.image(self.image_id),)
|
||||||
@@ -116,7 +116,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PhilomenaExtractor.__init__(self, match)
|
PhilomenaExtractor.__init__(self, match)
|
||||||
self.gallery_id = match.group(match.lastindex)
|
self.gallery_id = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class PhotovogueUserExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.user_id = match.group(1)
|
self.user_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for photo in self.photos():
|
for photo in self.photos():
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2021-2023 Mike Fährmann
|
# Copyright 2021-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -25,7 +25,7 @@ class PicartoGalleryExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.username = match.group(1)
|
self.username = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ class PillowfortExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.item = match.group(1)
|
self.item = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.login()
|
self.login()
|
||||||
|
|||||||
@@ -214,7 +214,7 @@ class PinterestPinExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PinterestExtractor.__init__(self, match)
|
PinterestExtractor.__init__(self, match)
|
||||||
self.pin_id = match.group(1)
|
self.pin_id = match[1]
|
||||||
self.pin = None
|
self.pin = None
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -236,8 +236,8 @@ class PinterestBoardExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PinterestExtractor.__init__(self, match)
|
PinterestExtractor.__init__(self, match)
|
||||||
self.user = text.unquote(match.group(1))
|
self.user = text.unquote(match[1])
|
||||||
self.board_name = text.unquote(match.group(2))
|
self.board_name = text.unquote(match[2])
|
||||||
self.board = None
|
self.board = None
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -266,7 +266,7 @@ class PinterestUserExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PinterestExtractor.__init__(self, match)
|
PinterestExtractor.__init__(self, match)
|
||||||
self.user = text.unquote(match.group(1))
|
self.user = text.unquote(match[1])
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for board in self.api.boards(self.user):
|
for board in self.api.boards(self.user):
|
||||||
@@ -285,7 +285,7 @@ class PinterestAllpinsExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PinterestExtractor.__init__(self, match)
|
PinterestExtractor.__init__(self, match)
|
||||||
self.user = text.unquote(match.group(1))
|
self.user = text.unquote(match[1])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"user": self.user}
|
return {"user": self.user}
|
||||||
@@ -303,7 +303,7 @@ class PinterestCreatedExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PinterestExtractor.__init__(self, match)
|
PinterestExtractor.__init__(self, match)
|
||||||
self.user = text.unquote(match.group(1))
|
self.user = text.unquote(match[1])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"user": self.user}
|
return {"user": self.user}
|
||||||
@@ -323,9 +323,9 @@ class PinterestSectionExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PinterestExtractor.__init__(self, match)
|
PinterestExtractor.__init__(self, match)
|
||||||
self.user = text.unquote(match.group(1))
|
self.user = text.unquote(match[1])
|
||||||
self.board_slug = text.unquote(match.group(2))
|
self.board_slug = text.unquote(match[2])
|
||||||
self.section_slug = text.unquote(match.group(3))
|
self.section_slug = text.unquote(match[3])
|
||||||
self.section = None
|
self.section = None
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -351,7 +351,7 @@ class PinterestSearchExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PinterestExtractor.__init__(self, match)
|
PinterestExtractor.__init__(self, match)
|
||||||
self.search = text.unquote(match.group(1))
|
self.search = text.unquote(match[1])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search": self.search}
|
return {"search": self.search}
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ class PixeldrainFileExtractor(PixeldrainExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.file_id = match.group(1)
|
self.file_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/api/file/{}".format(self.root, self.file_id)
|
url = "{}/api/file/{}".format(self.root, self.file_id)
|
||||||
@@ -64,8 +64,8 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.album_id = match.group(1)
|
self.album_id = match[1]
|
||||||
self.file_index = match.group(2)
|
self.file_index = match[2]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/api/list/{}".format(self.root, self.album_id)
|
url = "{}/api/list/{}".format(self.root, self.album_id)
|
||||||
|
|||||||
@@ -320,7 +320,7 @@ class PixivExtractor(Extractor):
|
|||||||
if not caption:
|
if not caption:
|
||||||
return ""
|
return ""
|
||||||
return text.unescape(self.meta_captions_sub(
|
return text.unescape(self.meta_captions_sub(
|
||||||
lambda m: '<a href="' + text.unquote(m.group(1)), caption))
|
lambda m: '<a href="' + text.unquote(m[1]), caption))
|
||||||
|
|
||||||
def _fallback_image(self, src):
|
def _fallback_image(self, src):
|
||||||
if isinstance(src, str):
|
if isinstance(src, str):
|
||||||
@@ -558,7 +558,7 @@ class PixivWorkExtractor(PixivExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PixivExtractor.__init__(self, match)
|
PixivExtractor.__init__(self, match)
|
||||||
self.illust_id = match.group(1) or match.group(2)
|
self.illust_id = match[1] or match[2]
|
||||||
|
|
||||||
def works(self):
|
def works(self):
|
||||||
works = (self.api.illust_detail(self.illust_id),)
|
works = (self.api.illust_detail(self.illust_id),)
|
||||||
@@ -658,7 +658,7 @@ class PixivRankingExtractor(PixivExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PixivExtractor.__init__(self, match)
|
PixivExtractor.__init__(self, match)
|
||||||
self.query = match.group(1)
|
self.query = match[1]
|
||||||
self.mode = self.date = None
|
self.mode = self.date = None
|
||||||
|
|
||||||
def works(self):
|
def works(self):
|
||||||
@@ -812,7 +812,7 @@ class PixivPixivisionExtractor(PixivExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PixivExtractor.__init__(self, match)
|
PixivExtractor.__init__(self, match)
|
||||||
self.pixivision_id = match.group(1)
|
self.pixivision_id = match[1]
|
||||||
|
|
||||||
def works(self):
|
def works(self):
|
||||||
return (
|
return (
|
||||||
@@ -870,7 +870,7 @@ class PixivNovelExtractor(PixivExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PixivExtractor.__init__(self, match)
|
PixivExtractor.__init__(self, match)
|
||||||
self.novel_id = match.group(1)
|
self.novel_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
tags = self.config("tags", "japanese")
|
tags = self.config("tags", "japanese")
|
||||||
@@ -1041,7 +1041,7 @@ class PixivSketchExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.username = match.group(1)
|
self.username = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
headers = {"Referer": "{}/@{}".format(self.root, self.username)}
|
headers = {"Referer": "{}/@{}".format(self.root, self.username)}
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ class PlurkTimelineExtractor(PlurkExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PlurkExtractor.__init__(self, match)
|
PlurkExtractor.__init__(self, match)
|
||||||
self.user = match.group(1)
|
self.user = match[1]
|
||||||
|
|
||||||
def plurks(self):
|
def plurks(self):
|
||||||
url = "{}/{}".format(self.root, self.user)
|
url = "{}/{}".format(self.root, self.user)
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class PoringaExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.item = match.group(1)
|
self.item = match[1]
|
||||||
self.__cookies = True
|
self.__cookies = True
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PornhubExtractor.__init__(self, match)
|
PornhubExtractor.__init__(self, match)
|
||||||
self.gallery_id = match.group(1)
|
self.gallery_id = match[1]
|
||||||
self._first = None
|
self._first = None
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -141,7 +141,7 @@ class PornhubGifExtractor(PornhubExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PornhubExtractor.__init__(self, match)
|
PornhubExtractor.__init__(self, match)
|
||||||
self.gallery_id = match.group(1)
|
self.gallery_id = match[1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/gif/{}".format(self.root, self.gallery_id)
|
url = "{}/gif/{}".format(self.root, self.gallery_id)
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
|
|||||||
example = "https://www.pornpics.com/galleries/TITLE-12345/"
|
example = "https://www.pornpics.com/galleries/TITLE-12345/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
url = "{}/galleries/{}/".format(self.root, match.group(1))
|
url = "{}/galleries/{}/".format(self.root, match[1])
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
items = GalleryExtractor.items
|
items = GalleryExtractor.items
|
||||||
|
|||||||
@@ -46,8 +46,8 @@ class PostmillExtractor(BaseExtractor):
|
|||||||
'</div>')
|
'</div>')
|
||||||
|
|
||||||
match = self._search_canonical_url(post_canonical_url)
|
match = self._search_canonical_url(post_canonical_url)
|
||||||
forum = match.group(1)
|
forum = match[1]
|
||||||
id = int(match.group(2))
|
id = int(match[2])
|
||||||
|
|
||||||
is_text_post = (url[0] == "/")
|
is_text_post = (url[0] == "/")
|
||||||
is_image_post = self._search_image_tag(page) is not None
|
is_image_post = self._search_image_tag(page) is not None
|
||||||
@@ -142,8 +142,8 @@ class PostmillPostExtractor(PostmillExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PostmillExtractor.__init__(self, match)
|
PostmillExtractor.__init__(self, match)
|
||||||
self.forum = match.group(3)
|
self.forum = match[3]
|
||||||
self.post_id = match.group(4)
|
self.post_id = match[4]
|
||||||
|
|
||||||
def post_urls(self):
|
def post_urls(self):
|
||||||
return (self.root + "/f/" + self.forum + "/" + self.post_id,)
|
return (self.root + "/f/" + self.forum + "/" + self.post_id,)
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor):
|
|||||||
example = "https://rawkuma.net/TITLE-chapter-123/"
|
example = "https://rawkuma.net/TITLE-chapter-123/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
url = "{}/{}/".format(self.root, match.group(1))
|
url = "{}/{}/".format(self.root, match[1])
|
||||||
ChapterExtractor.__init__(self, match, url)
|
ChapterExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
@@ -61,7 +61,7 @@ class RawkumaMangaExtractor(RawkumaBase, MangaExtractor):
|
|||||||
example = "https://rawkuma.net/manga/TITLE/"
|
example = "https://rawkuma.net/manga/TITLE/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
url = "{}/manga/{}/".format(self.root, match.group(1))
|
url = "{}/manga/{}/".format(self.root, match[1])
|
||||||
MangaExtractor.__init__(self, match, url)
|
MangaExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def chapters(self, page):
|
def chapters(self, page):
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2019-2023 Mike Fährmann
|
# Copyright 2019-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -23,7 +23,7 @@ class ReactorExtractor(BaseExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
|
|
||||||
url = text.ensure_http_scheme(match.group(0), "http://")
|
url = text.ensure_http_scheme(match[0], "http://")
|
||||||
pos = url.index("/", 10)
|
pos = url.index("/", 10)
|
||||||
self.root = url[:pos]
|
self.root = url[:pos]
|
||||||
self.path = url[pos:]
|
self.path = url[pos:]
|
||||||
@@ -176,7 +176,7 @@ class ReactorTagExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.tag = match.group(match.lastindex)
|
self.tag = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
||||||
@@ -192,7 +192,7 @@ class ReactorSearchExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.tag = match.group(match.lastindex)
|
self.tag = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
||||||
@@ -207,7 +207,7 @@ class ReactorUserExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.user = match.group(match.lastindex)
|
self.user = match[match.lastindex]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"user": text.unescape(self.user).replace("+", " ")}
|
return {"user": text.unescape(self.user).replace("+", " ")}
|
||||||
@@ -221,7 +221,7 @@ class ReactorPostExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(match.lastindex)
|
self.post_id = match[match.lastindex]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
post = self.request(self.root + self.path).text
|
post = self.request(self.root + self.path).text
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ChapterExtractor.__init__(self, match)
|
ChapterExtractor.__init__(self, match)
|
||||||
self.params = match.group(2)
|
self.params = match[2]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
params = text.parse_query(self.params)
|
params = text.parse_query(self.params)
|
||||||
@@ -71,7 +71,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
|
|||||||
match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
|
match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
|
||||||
return {
|
return {
|
||||||
"comic": comic,
|
"comic": comic,
|
||||||
"issue": match.group(1) or match.group(2),
|
"issue": match[1] or match[2],
|
||||||
"issue_id": text.parse_int(self.issue_id),
|
"issue_id": text.parse_int(self.issue_id),
|
||||||
"lang": "en",
|
"lang": "en",
|
||||||
"language": "English",
|
"language": "English",
|
||||||
|
|||||||
@@ -28,4 +28,4 @@ class RecursiveExtractor(Extractor):
|
|||||||
page = self.request(text.ensure_http_scheme(url)).text
|
page = self.request(text.ensure_http_scheme(url)).text
|
||||||
|
|
||||||
for match in util.re(r"https?://[^\s\"']+").finditer(page):
|
for match in util.re(r"https?://[^\s\"']+").finditer(page):
|
||||||
yield Message.Queue, match.group(0), {}
|
yield Message.Queue, match[0], {}
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ class RedditExtractor(Extractor):
|
|||||||
|
|
||||||
match = match_submission(url)
|
match = match_submission(url)
|
||||||
if match:
|
if match:
|
||||||
extra.append(match.group(1))
|
extra.append(match[1])
|
||||||
elif not match_user(url) and not match_subreddit(url):
|
elif not match_user(url) and not match_subreddit(url):
|
||||||
if previews and "comment" not in data and \
|
if previews and "comment" not in data and \
|
||||||
"preview" in data:
|
"preview" in data:
|
||||||
@@ -309,7 +309,7 @@ class RedditSubmissionExtractor(RedditExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
RedditExtractor.__init__(self, match)
|
RedditExtractor.__init__(self, match)
|
||||||
self.submission_id = match.group(1)
|
self.submission_id = match[1]
|
||||||
|
|
||||||
def submissions(self):
|
def submissions(self):
|
||||||
return (self.api.submission(self.submission_id),)
|
return (self.api.submission(self.submission_id),)
|
||||||
@@ -326,14 +326,14 @@ class RedditImageExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
domain = match.group(1)
|
domain = match[1]
|
||||||
self.path = match.group(2)
|
self.path = match[2]
|
||||||
if domain == "preview.redd.it":
|
if domain == "preview.redd.it":
|
||||||
self.domain = "i.redd.it"
|
self.domain = "i.redd.it"
|
||||||
self.query = ""
|
self.query = ""
|
||||||
else:
|
else:
|
||||||
self.domain = domain
|
self.domain = domain
|
||||||
self.query = match.group(3) or ""
|
self.query = match[3] or ""
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "https://{}/{}{}".format(self.domain, self.path, self.query)
|
url = "https://{}/{}{}".format(self.domain, self.path, self.query)
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class RedgifsExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.key = match.group(1)
|
self.key = match[1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.api = RedgifsAPI(self)
|
self.api = RedgifsAPI(self)
|
||||||
@@ -94,7 +94,7 @@ class RedgifsUserExtractor(RedgifsExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
RedgifsExtractor.__init__(self, match)
|
RedgifsExtractor.__init__(self, match)
|
||||||
self.query = match.group(2)
|
self.query = match[2]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"userName": self.key}
|
return {"userName": self.key}
|
||||||
@@ -116,7 +116,7 @@ class RedgifsCollectionExtractor(RedgifsExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
RedgifsExtractor.__init__(self, match)
|
RedgifsExtractor.__init__(self, match)
|
||||||
self.collection_id = match.group(2)
|
self.collection_id = match[2]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
collection = self.api.collection_info(self.key, self.collection_id)
|
collection = self.api.collection_info(self.key, self.collection_id)
|
||||||
@@ -151,7 +151,7 @@ class RedgifsNichesExtractor(RedgifsExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
RedgifsExtractor.__init__(self, match)
|
RedgifsExtractor.__init__(self, match)
|
||||||
self.query = match.group(2)
|
self.query = match[2]
|
||||||
|
|
||||||
def gifs(self):
|
def gifs(self):
|
||||||
order = text.parse_query(self.query).get("order")
|
order = text.parse_query(self.query).get("order")
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ class Rule34usTagExtractor(Rule34usExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Rule34usExtractor.__init__(self, match)
|
Rule34usExtractor.__init__(self, match)
|
||||||
self.tags = text.unquote(match.group(1).replace("+", " "))
|
self.tags = text.unquote(match[1].replace("+", " "))
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": self.tags}
|
return {"search_tags": self.tags}
|
||||||
@@ -98,7 +98,7 @@ class Rule34usPostExtractor(Rule34usExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Rule34usExtractor.__init__(self, match)
|
Rule34usExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(1)
|
self.post_id = match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self._parse_post(self.post_id),)
|
return (self._parse_post(self.post_id),)
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ class SankakuTagExtractor(SankakuExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
SankakuExtractor.__init__(self, match)
|
SankakuExtractor.__init__(self, match)
|
||||||
query = text.parse_query(match.group(1))
|
query = text.parse_query(match[1])
|
||||||
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
|
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
|
||||||
|
|
||||||
if "date:" in self.tags:
|
if "date:" in self.tags:
|
||||||
@@ -154,7 +154,7 @@ class SankakuPoolExtractor(SankakuExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
SankakuExtractor.__init__(self, match)
|
SankakuExtractor.__init__(self, match)
|
||||||
self.pool_id = match.group(1)
|
self.pool_id = match[1]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
pool = self.api.pools(self.pool_id)
|
pool = self.api.pools(self.pool_id)
|
||||||
@@ -180,7 +180,7 @@ class SankakuPostExtractor(SankakuExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
SankakuExtractor.__init__(self, match)
|
SankakuExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(1)
|
self.post_id = match[1]
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return self.api.posts(self.post_id)
|
return self.api.posts(self.post_id)
|
||||||
@@ -194,7 +194,7 @@ class SankakuBooksExtractor(SankakuExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
SankakuExtractor.__init__(self, match)
|
SankakuExtractor.__init__(self, match)
|
||||||
query = text.parse_query(match.group(1))
|
query = text.parse_query(match[1])
|
||||||
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
|
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class SankakucomplexExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.path = match.group(1)
|
self.path = match[1]
|
||||||
|
|
||||||
|
|
||||||
class SankakucomplexArticleExtractor(SankakucomplexExtractor):
|
class SankakucomplexArticleExtractor(SankakucomplexExtractor):
|
||||||
|
|||||||
@@ -187,7 +187,7 @@ class SeigaImageExtractor(SeigaExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
SeigaExtractor.__init__(self, match)
|
SeigaExtractor.__init__(self, match)
|
||||||
self.image_id = match.group(1)
|
self.image_id = match[1]
|
||||||
|
|
||||||
def skip(self, num):
|
def skip(self, num):
|
||||||
self.start_image += num
|
self.start_image += num
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2019-2023 Mike Fährmann
|
# Copyright 2019-2025 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -20,7 +20,7 @@ class ShopifyExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.item_url = self.root + match.group(match.lastindex)
|
self.item_url = self.root + match[match.lastindex]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = self.metadata()
|
data = self.metadata()
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user