[common] use extractor subcategory for 'notfound=True'
This commit is contained in:
@@ -41,7 +41,7 @@ class _2chenThreadExtractor(_2chenExtractor):
|
||||
board = self.groups[-2]
|
||||
thread = self.kwdict["thread"] = self.groups[-1]
|
||||
url = f"{self.root}/{board}/{thread}"
|
||||
page = self.request(url, encoding="utf-8", notfound="thread").text
|
||||
page = self.request(url, encoding="utf-8", notfound=True).text
|
||||
|
||||
self.kwdict["board"], pos = text.extract(
|
||||
page, 'class="board">/', '/<')
|
||||
@@ -89,7 +89,7 @@ class _2chenBoardExtractor(_2chenExtractor):
|
||||
|
||||
def items(self):
|
||||
url = f"{self.root}/{self.groups[-1]}/catalog"
|
||||
page = self.request(url, notfound="board").text
|
||||
page = self.request(url, notfound=True).text
|
||||
data = {"_extractor": _2chenThreadExtractor}
|
||||
for thread in text.extract_iter(
|
||||
page, '<figure><a href="', '"'):
|
||||
|
||||
@@ -139,7 +139,7 @@ class Ao3WorkExtractor(Ao3Extractor):
|
||||
|
||||
work_id = self.groups[0]
|
||||
url = f"{self.root}/works/{work_id}"
|
||||
response = self.request(url, notfound="work")
|
||||
response = self.request(url, notfound=True)
|
||||
|
||||
if response.url.endswith("/users/login?restricted=true"):
|
||||
raise exception.AuthorizationError(
|
||||
|
||||
@@ -282,7 +282,7 @@ class ArtstationCollectionExtractor(ArtstationExtractor):
|
||||
url = f"{self.root}/collections/{self.collection_id}.json"
|
||||
params = {"username": self.user}
|
||||
collection = self.request_json(
|
||||
url, params=params, notfound="collection")
|
||||
url, params=params, notfound=True)
|
||||
return {"collection": collection, "user": self.user}
|
||||
|
||||
def projects(self):
|
||||
@@ -303,7 +303,7 @@ class ArtstationCollectionsExtractor(ArtstationExtractor):
|
||||
params = {"username": self.user}
|
||||
|
||||
for collection in self.request_json(
|
||||
url, params=params, notfound="collections"):
|
||||
url, params=params, notfound=True):
|
||||
url = f"{self.root}/{self.user}/collections/{collection['id']}"
|
||||
collection["_extractor"] = ArtstationCollectionExtractor
|
||||
yield Message.Queue, url, collection
|
||||
|
||||
@@ -54,7 +54,7 @@ class CienArticleExtractor(CienExtractor):
|
||||
def items(self):
|
||||
author_id, post_id = self.groups
|
||||
url = f"{self.root}/creator/{author_id}/article/{post_id}"
|
||||
page = self.request(url, notfound="article").text
|
||||
page = self.request(url, notfound=True).text
|
||||
|
||||
files = self._extract_files(page)
|
||||
post = self._extract_jsonld(page)[0]
|
||||
|
||||
@@ -216,7 +216,9 @@ class Extractor():
|
||||
if encoding:
|
||||
response.encoding = encoding
|
||||
return response
|
||||
if notfound and code == 404:
|
||||
if notfound is not None and code == 404:
|
||||
if notfound is True:
|
||||
notfound = self.__class__.subcategory
|
||||
self.status |= exception.NotFoundError.code
|
||||
raise exception.NotFoundError(notfound)
|
||||
|
||||
|
||||
@@ -1277,7 +1277,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
||||
else:
|
||||
url = f"{self.root}/view/{self.deviation_id}/"
|
||||
|
||||
page = self._limited_request(url, notfound="deviation").text
|
||||
page = self._limited_request(url, notfound=True).text
|
||||
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
|
||||
if not uuid:
|
||||
raise exception.NotFoundError("deviation")
|
||||
|
||||
@@ -115,7 +115,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
|
||||
while True:
|
||||
try:
|
||||
response = self.request(
|
||||
url, headers=headers, params=params, notfound="creator")
|
||||
url, headers=headers, params=params, notfound=True)
|
||||
except exception.HttpError as exc:
|
||||
if exc.response.status_code == 429:
|
||||
self.wait(
|
||||
|
||||
@@ -49,7 +49,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
|
||||
example = "https://www.imagebam.com/view/GID"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.root + self.path, notfound="gallery").text
|
||||
page = self.request(self.root + self.path, notfound=True).text
|
||||
|
||||
images = self.images(page)
|
||||
images.reverse()
|
||||
|
||||
@@ -422,7 +422,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
|
||||
url = f"{self.root}/user?u={user_id}"
|
||||
else:
|
||||
url = f"{self.root}/{creator}"
|
||||
page = self.request(url, notfound="creator").text
|
||||
page = self.request(url, notfound=True).text
|
||||
|
||||
try:
|
||||
data = None
|
||||
@@ -478,7 +478,7 @@ class PatreonPostExtractor(PatreonExtractor):
|
||||
|
||||
def posts(self):
|
||||
url = f"{self.root}/posts/{self.groups[0]}"
|
||||
page = self.request(url, notfound="post").text
|
||||
page = self.request(url, notfound=True).text
|
||||
bootstrap = self._extract_bootstrap(page)
|
||||
|
||||
try:
|
||||
|
||||
@@ -198,7 +198,7 @@ class SeigaImageExtractor(SeigaExtractor):
|
||||
"skip_fetish_warning", "1", domain="seiga.nicovideo.jp")
|
||||
|
||||
url = f"{self.root}/seiga/im{self.image_id}"
|
||||
page = self.request(url, notfound="image").text
|
||||
page = self.request(url, notfound=True).text
|
||||
|
||||
data = text.extract_all(page, (
|
||||
("date" , '<li class="date"><span class="created">', '<'),
|
||||
|
||||
Reference in New Issue
Block a user