[common] use extractor subcategory for 'notfound=True'

This commit is contained in:
Mike Fährmann
2026-01-18 22:16:18 +01:00
parent 09635352d0
commit 366b0750a8
10 changed files with 15 additions and 13 deletions

View File

@@ -41,7 +41,7 @@ class _2chenThreadExtractor(_2chenExtractor):
board = self.groups[-2]
thread = self.kwdict["thread"] = self.groups[-1]
url = f"{self.root}/{board}/{thread}"
page = self.request(url, encoding="utf-8", notfound="thread").text
page = self.request(url, encoding="utf-8", notfound=True).text
self.kwdict["board"], pos = text.extract(
page, 'class="board">/', '/<')
@@ -89,7 +89,7 @@ class _2chenBoardExtractor(_2chenExtractor):
def items(self):
url = f"{self.root}/{self.groups[-1]}/catalog"
page = self.request(url, notfound="board").text
page = self.request(url, notfound=True).text
data = {"_extractor": _2chenThreadExtractor}
for thread in text.extract_iter(
page, '<figure><a href="', '"'):

View File

@@ -139,7 +139,7 @@ class Ao3WorkExtractor(Ao3Extractor):
work_id = self.groups[0]
url = f"{self.root}/works/{work_id}"
response = self.request(url, notfound="work")
response = self.request(url, notfound=True)
if response.url.endswith("/users/login?restricted=true"):
raise exception.AuthorizationError(

View File

@@ -282,7 +282,7 @@ class ArtstationCollectionExtractor(ArtstationExtractor):
url = f"{self.root}/collections/{self.collection_id}.json"
params = {"username": self.user}
collection = self.request_json(
url, params=params, notfound="collection")
url, params=params, notfound=True)
return {"collection": collection, "user": self.user}
def projects(self):
@@ -303,7 +303,7 @@ class ArtstationCollectionsExtractor(ArtstationExtractor):
params = {"username": self.user}
for collection in self.request_json(
url, params=params, notfound="collections"):
url, params=params, notfound=True):
url = f"{self.root}/{self.user}/collections/{collection['id']}"
collection["_extractor"] = ArtstationCollectionExtractor
yield Message.Queue, url, collection

View File

@@ -54,7 +54,7 @@ class CienArticleExtractor(CienExtractor):
def items(self):
author_id, post_id = self.groups
url = f"{self.root}/creator/{author_id}/article/{post_id}"
page = self.request(url, notfound="article").text
page = self.request(url, notfound=True).text
files = self._extract_files(page)
post = self._extract_jsonld(page)[0]

View File

@@ -216,7 +216,9 @@ class Extractor():
if encoding:
response.encoding = encoding
return response
if notfound and code == 404:
if notfound is not None and code == 404:
if notfound is True:
notfound = self.__class__.subcategory
self.status |= exception.NotFoundError.code
raise exception.NotFoundError(notfound)

View File

@@ -1277,7 +1277,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
else:
url = f"{self.root}/view/{self.deviation_id}/"
page = self._limited_request(url, notfound="deviation").text
page = self._limited_request(url, notfound=True).text
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
if not uuid:
raise exception.NotFoundError("deviation")

View File

@@ -115,7 +115,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
while True:
try:
response = self.request(
url, headers=headers, params=params, notfound="creator")
url, headers=headers, params=params, notfound=True)
except exception.HttpError as exc:
if exc.response.status_code == 429:
self.wait(

View File

@@ -49,7 +49,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
example = "https://www.imagebam.com/view/GID"
def items(self):
page = self.request(self.root + self.path, notfound="gallery").text
page = self.request(self.root + self.path, notfound=True).text
images = self.images(page)
images.reverse()

View File

@@ -422,7 +422,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
url = f"{self.root}/user?u={user_id}"
else:
url = f"{self.root}/{creator}"
page = self.request(url, notfound="creator").text
page = self.request(url, notfound=True).text
try:
data = None
@@ -478,7 +478,7 @@ class PatreonPostExtractor(PatreonExtractor):
def posts(self):
url = f"{self.root}/posts/{self.groups[0]}"
page = self.request(url, notfound="post").text
page = self.request(url, notfound=True).text
bootstrap = self._extract_bootstrap(page)
try:

View File

@@ -198,7 +198,7 @@ class SeigaImageExtractor(SeigaExtractor):
"skip_fetish_warning", "1", domain="seiga.nicovideo.jp")
url = f"{self.root}/seiga/im{self.image_id}"
page = self.request(url, notfound="image").text
page = self.request(url, notfound=True).text
data = text.extract_all(page, (
("date" , '<li class="date"><span class="created">', '<'),