[common] add reference to 'exception' module to Extractor class

- remove 'exception' imports
- replace with 'self.exc'
This commit is contained in:
Mike Fährmann
2026-02-14 21:29:26 +01:00
parent b552cdba04
commit 53cdfaac37
100 changed files with 382 additions and 382 deletions

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2024-2025 Mike Fährmann # Copyright 2024-2026 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,7 @@
"""Extractors for https://archiveofourown.org/""" """Extractors for https://archiveofourown.org/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?" BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
@@ -88,11 +88,11 @@ class Ao3Extractor(Extractor):
response = self.request(url, method="POST", data=data) response = self.request(url, method="POST", data=data)
if not response.history: if not response.history:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
remember = response.history[0].cookies.get("remember_user_token") remember = response.history[0].cookies.get("remember_user_token")
if not remember: if not remember:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return { return {
"remember_user_token": remember, "remember_user_token": remember,
@@ -142,12 +142,12 @@ class Ao3WorkExtractor(Ao3Extractor):
response = self.request(url, notfound=True) response = self.request(url, notfound=True)
if response.url.endswith("/users/login?restricted=true"): if response.url.endswith("/users/login?restricted=true"):
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
"Login required to access member-only works") "Login required to access member-only works")
page = response.text page = response.text
if len(page) < 20000 and \ if len(page) < 20000 and \
'<h2 class="landmark heading">Adult Content Warning</' in page: '<h2 class="landmark heading">Adult Content Warning</' in page:
raise exception.AbortExtraction("Adult Content") raise self.exc.AbortExtraction("Adult Content")
extr = text.extract_from(page) extr = text.extract_from(page)

View File

@@ -7,7 +7,7 @@
"""Extractors for https://arca.live/""" """Extractors for https://arca.live/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live" BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
@@ -173,7 +173,7 @@ class ArcaliveAPI():
msg = "API request failed: " + msg msg = "API request failed: " + msg
else: else:
msg = "API request failed" msg = "API request failed"
raise exception.AbortExtraction(msg) raise self.exc.AbortExtraction(msg)
def _pagination(self, endpoint, params, key): def _pagination(self, endpoint, params, key):
while True: while True:

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.artstation.com/""" """Extractors for https://www.artstation.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
import itertools import itertools
@@ -121,7 +121,7 @@ class ArtstationExtractor(Extractor):
try: try:
data = self.request_json(url) data = self.request_json(url)
except exception.HttpError as exc: except self.exc.HttpError as exc:
self.log.warning(exc) self.log.warning(exc)
return return
@@ -239,7 +239,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
if album["id"] == self.album_id: if album["id"] == self.album_id:
break break
else: else:
raise exception.NotFoundError("album") raise self.exc.NotFoundError("album")
return { return {
"userinfo": userinfo, "userinfo": userinfo,

View File

@@ -9,7 +9,7 @@
"""Extractors for https://aryion.com/""" """Extractors for https://aryion.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, dt, exception from .. import text, util, dt
from ..cache import cache from ..cache import cache
from email.utils import parsedate_tz from email.utils import parsedate_tz
@@ -52,7 +52,7 @@ class AryionExtractor(Extractor):
response = self.request(url, method="POST", data=data) response = self.request(url, method="POST", data=data)
if b"You have been successfully logged in." not in response.content: if b"You have been successfully logged in." not in response.content:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return {c: response.cookies[c] for c in self.cookies_names} return {c: response.cookies[c] for c in self.cookies_names}
def items(self): def items(self):
@@ -258,7 +258,7 @@ class AryionWatchExtractor(AryionExtractor):
def posts(self): def posts(self):
if not self.cookies_check(self.cookies_names): if not self.cookies_check(self.cookies_names):
raise exception.AuthRequired( raise self.exc.AuthRequired(
("username & password", "authenticated cookies"), ("username & password", "authenticated cookies"),
"watched Submissions") "watched Submissions")
self.cookies.set("g4p_msgpage_style", "plain", domain="aryion.com") self.cookies.set("g4p_msgpage_style", "plain", domain="aryion.com")

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.behance.net/""" """Extractors for https://www.behance.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
class BehanceExtractor(Extractor): class BehanceExtractor(Extractor):
@@ -139,13 +139,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
if not data["modules"]: if not data["modules"]:
access = data.get("matureAccess") access = data.get("matureAccess")
if access == "logged-out": if access == "logged-out":
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
"Mature content galleries require logged-in cookies") "Mature content galleries require logged-in cookies")
if access == "restricted-safe": if access == "restricted-safe":
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
"Mature content blocked in account settings") "Mature content blocked in account settings")
if access and access != "allowed": if access and access != "allowed":
raise exception.AuthorizationError() raise self.exc.AuthorizationError()
return () return ()
results = [] results = []

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.bellazon.com/""" """Extractors for https://www.bellazon.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?bellazon\.com/main" BASE_PATTERN = r"(?:https?://)?(?:www\.)?bellazon\.com/main"
@@ -207,7 +207,7 @@ class BellazonPostExtractor(BellazonExtractor):
pos = page.find('id="elComment_' + post_id) pos = page.find('id="elComment_' + post_id)
if pos < 0: if pos < 0:
raise exception.NotFoundError("post") raise self.exc.NotFoundError("post")
html = text.extract(page, "<article ", "</article>", pos-100)[0] html = text.extract(page, "<article ", "</article>", pos-100)[0]
self.kwdict["thread"] = self._parse_thread(page) self.kwdict["thread"] = self._parse_thread(page)

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.bilibili.com/""" """Extractors for https://www.bilibili.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
class BilibiliExtractor(Extractor): class BilibiliExtractor(Extractor):
@@ -123,7 +123,7 @@ class BilibiliAPI():
if data["code"]: if data["code"]:
self.extractor.log.debug("Server response: %s", data) self.extractor.log.debug("Server response: %s", data)
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")
return data return data
@@ -151,7 +151,7 @@ class BilibiliAPI():
page, "window.__INITIAL_STATE__=", "};") + "}") page, "window.__INITIAL_STATE__=", "};") + "}")
except Exception: except Exception:
if "window._riskdata_" not in page: if "window._riskdata_" not in page:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
article_id + ": Unable to extract INITIAL_STATE data") article_id + ": Unable to extract INITIAL_STATE data")
self.extractor.wait(seconds=300) self.extractor.wait(seconds=300)
@@ -174,9 +174,9 @@ class BilibiliAPI():
if data["code"] != 0: if data["code"] != 0:
self.extractor.log.debug("Server response: %s", data) self.extractor.log.debug("Server response: %s", data)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"API request failed. Are you logges in?") "API request failed. Are you logges in?")
try: try:
return data["data"]["profile"]["mid"] return data["data"]["profile"]["mid"]
except Exception: except Exception:
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")

View File

@@ -9,7 +9,7 @@
"""Extractors for https://bsky.app/""" """Extractors for https://bsky.app/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util
from ..cache import cache, memcache from ..cache import cache, memcache
BASE_PATTERN = (r"(?:https?://)?" BASE_PATTERN = (r"(?:https?://)?"
@@ -96,7 +96,7 @@ class BlueskyExtractor(Extractor):
uri = record["value"]["subject"]["uri"] uri = record["value"]["subject"]["uri"]
if "/app.bsky.feed.post/" in uri: if "/app.bsky.feed.post/" in uri:
yield from self.api.get_post_thread_uri(uri, depth) yield from self.api.get_post_thread_uri(uri, depth)
except exception.ControlException: except self.exc.ControlException:
pass # deleted post pass # deleted post
except Exception as exc: except Exception as exc:
self.log.debug(record, exc_info=exc) self.log.debug(record, exc_info=exc)
@@ -569,7 +569,7 @@ class BlueskyAPI():
if response.status_code != 200: if response.status_code != 200:
self.log.debug("Server response: %s", data) self.log.debug("Server response: %s", data)
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
f"\"{data.get('error')}: {data.get('message')}\"") f"\"{data.get('error')}: {data.get('message')}\"")
_refresh_token_cache.update(self.username, data["refreshJwt"]) _refresh_token_cache.update(self.username, data["refreshJwt"])
@@ -600,7 +600,7 @@ class BlueskyAPI():
msg = f"{msg} ({response.status_code} {response.reason})" msg = f"{msg} ({response.status_code} {response.reason})"
self.extractor.log.debug("Server response: %s", response.text) self.extractor.log.debug("Server response: %s", response.text)
raise exception.AbortExtraction(msg) raise self.exc.AbortExtraction(msg)
def _pagination(self, endpoint, params, def _pagination(self, endpoint, params,
key="feed", root=None, check_empty=False): key="feed", root=None, check_empty=False):

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.boosty.to/""" """Extractors for https://www.boosty.to/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
import itertools import itertools
BASE_PATTERN = r"(?:https?://)?boosty\.to" BASE_PATTERN = r"(?:https?://)?boosty\.to"
@@ -380,14 +380,14 @@ class BoostyAPI():
return response.json() return response.json()
elif response.status_code < 400: elif response.status_code < 400:
raise exception.AuthenticationError("Invalid API access token") raise self.exc.AuthenticationError("Invalid API access token")
elif response.status_code == 429: elif response.status_code == 429:
self.extractor.wait(seconds=600) self.extractor.wait(seconds=600)
else: else:
self.extractor.log.debug(response.text) self.extractor.log.debug(response.text)
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")
def _pagination(self, endpoint, params, transform=None, key=None): def _pagination(self, endpoint, params, transform=None, key=None):
if "is_only_allowed" not in params and self.extractor.only_allowed: if "is_only_allowed" not in params and self.extractor.only_allowed:

View File

@@ -10,7 +10,7 @@
from .common import Extractor from .common import Extractor
from .lolisafe import LolisafeAlbumExtractor from .lolisafe import LolisafeAlbumExtractor
from .. import text, util, config, exception from .. import text, util, config
from ..cache import memcache from ..cache import memcache
import random import random
@@ -110,7 +110,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
self.log.debug("Redirect to known CF challenge domain '%s'", self.log.debug("Redirect to known CF challenge domain '%s'",
root) root)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status != 403: if exc.status != 403:
raise raise
@@ -125,7 +125,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
pass pass
else: else:
if not DOMAINS: if not DOMAINS:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"All Bunkr domains require solving a CF challenge") "All Bunkr domains require solving a CF challenge")
# select alternative domain # select alternative domain
@@ -172,15 +172,15 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y") item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
yield file yield file
except exception.ControlException: except self.exc.ControlException:
raise raise
except Exception as exc: except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc) self.log.error("%s: %s", exc.__class__.__name__, exc)
self.log.debug("%s", item, exc_info=exc) self.log.debug("%s", item, exc_info=exc)
if isinstance(exc, exception.HttpError) and \ if isinstance(exc, self.exc.HttpError) and \
exc.status == 400 and \ exc.status == 400 and \
exc.response.url.startswith(self.root_api): exc.response.url.startswith(self.root_api):
raise exception.AbortExtraction("Album deleted") raise self.exc.AbortExtraction("Album deleted")
def _extract_file(self, data_id): def _extract_file(self, data_id):
referer = f"{self.root_dl}/file/{data_id}" referer = f"{self.root_dl}/file/{data_id}"

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.civitai.com/""" """Extractors for https://www.civitai.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util
from ..cache import memcache from ..cache import memcache
import itertools import itertools
import time import time
@@ -201,7 +201,7 @@ class CivitaiExtractor(Extractor):
if "Authorization" not in self.api.headers and \ if "Authorization" not in self.api.headers and \
not self.cookies.get( not self.cookies.get(
"__Secure-civitai-token", domain=".civitai.com"): "__Secure-civitai-token", domain=".civitai.com"):
raise exception.AuthRequired(("api-key", "authenticated cookies")) raise self.exc.AuthRequired(("api-key", "authenticated cookies"))
def _parse_query(self, value): def _parse_query(self, value):
return text.parse_query_list( return text.parse_query_list(

View File

@@ -9,7 +9,7 @@
"""Extractors for https://comick.io/""" """Extractors for https://comick.io/"""
from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
from .. import text, exception from .. import text
from ..cache import memcache from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io" BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io"
@@ -71,7 +71,7 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
while True: while True:
try: try:
props = _chapter_info(self, manga, chstr) props = _chapter_info(self, manga, chstr)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.response.status_code != 404: if exc.response.status_code != 404:
raise raise
if exc.response.headers.get( if exc.response.headers.get(
@@ -84,7 +84,7 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
manga = _manga_info(self, slug) manga = _manga_info(self, slug)
continue continue
if b'"notFound":true' in exc.response.content: if b'"notFound":true' in exc.response.content:
raise exception.NotFoundError("chapter") raise self.exc.NotFoundError("chapter")
raise raise
if "__N_REDIRECT" in props: if "__N_REDIRECT" in props:

View File

@@ -54,6 +54,7 @@ class Extractor():
request_interval_429 = 60.0 request_interval_429 = 60.0
request_timestamp = 0.0 request_timestamp = 0.0
finalize = skip = None finalize = skip = None
exc = exception
def __init__(self, match): def __init__(self, match):
self.log = logging.getLogger(self.category) self.log = logging.getLogger(self.category)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://cyberfile.me/""" """Extractors for https://cyberfile.me/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberfile\.me" BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberfile\.me"
@@ -39,7 +39,7 @@ class CyberfileExtractor(Extractor):
resp = self.request_json( resp = self.request_json(
url_pw, method="POST", headers=headers, data=data_pw) url_pw, method="POST", headers=headers, data=data_pw)
if not resp.get("success"): if not resp.get("success"):
raise exception.AuthorizationError(f"'{resp.get('msg')}'") raise self.exc.AuthorizationError(f"'{resp.get('msg')}'")
resp = self.request_json( resp = self.request_json(
url, method="POST", headers=headers, data=data) url, method="POST", headers=headers, data=data)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.deviantart.com/""" """Extractors for https://www.deviantart.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, dt, exception from .. import text, util, dt
from ..cache import cache, memcache from ..cache import cache, memcache
import collections import collections
import mimetypes import mimetypes
@@ -123,7 +123,7 @@ class DeviantartExtractor(Extractor):
self.group = False self.group = False
elif group == "skip": elif group == "skip":
self.log.info("Skipping group '%s'", self.user) self.log.info("Skipping group '%s'", self.user)
raise exception.AbortExtraction() raise self.exc.AbortExtraction()
else: else:
self.subcategory = "group-" + self.subcategory self.subcategory = "group-" + self.subcategory
self.group = True self.group = True
@@ -457,7 +457,7 @@ class DeviantartExtractor(Extractor):
for subfolder in folder["subfolders"]: for subfolder in folder["subfolders"]:
if subfolder["folderid"] == uuid: if subfolder["folderid"] == uuid:
return subfolder return subfolder
raise exception.NotFoundError("folder") raise self.exc.NotFoundError("folder")
def _folder_urls(self, folders, category, extractor): def _folder_urls(self, folders, category, extractor):
base = f"{self.root}/{self.user}/{category}/" base = f"{self.root}/{self.user}/{category}/"
@@ -1027,7 +1027,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
page = self._limited_request(url, notfound=True).text page = self._limited_request(url, notfound=True).text
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\') uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
if not uuid: if not uuid:
raise exception.NotFoundError("deviation") raise self.exc.NotFoundError("deviation")
deviation = self.api.deviation(uuid) deviation = self.api.deviation(uuid)
deviation["_page"] = page deviation["_page"] = page
@@ -1111,7 +1111,7 @@ class DeviantartSearchExtractor(DeviantartExtractor):
response = self.request(url, params=params) response = self.request(url, params=params)
if response.history and "/users/login" in response.url: if response.history and "/users/login" in response.url:
raise exception.AbortExtraction("HTTP redirect to login page") raise self.exc.AbortExtraction("HTTP redirect to login page")
page = response.text page = response.text
for user, type, did in find(page)[:-3:3]: for user, type, did in find(page)[:-3:3]:
@@ -1476,7 +1476,7 @@ class DeviantartOAuthAPI():
if response.status_code != 200: if response.status_code != 200:
self.log.debug("Server response: %s", data) self.log.debug("Server response: %s", data)
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
f"\"{data.get('error_description')}\" ({data.get('error')})") f"\"{data.get('error_description')}\" ({data.get('error')})")
if refresh_token_key: if refresh_token_key:
_refresh_token_cache.update( _refresh_token_cache.update(
@@ -1515,9 +1515,9 @@ class DeviantartOAuthAPI():
error = data.get("error_description") error = data.get("error_description")
if error == "User not found.": if error == "User not found.":
raise exception.NotFoundError("user or group") raise self.exc.NotFoundError("user or group")
if error == "Deviation not downloadable.": if error == "Deviation not downloadable.":
raise exception.AuthorizationError() raise self.exc.AuthorizationError()
self.log.debug(response.text) self.log.debug(response.text)
msg = f"API responded with {status} {response.reason}" msg = f"API responded with {status} {response.reason}"
@@ -1808,7 +1808,7 @@ class DeviantartEclipseAPI():
pos = page.find('\\"name\\":\\"watching\\"') pos = page.find('\\"name\\":\\"watching\\"')
if pos < 0: if pos < 0:
raise exception.NotFoundError("'watching' module ID") raise self.exc.NotFoundError("'watching' module ID")
module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ') module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
self._fetch_csrf_token(page) self._fetch_csrf_token(page)
@@ -1863,7 +1863,7 @@ def _login_impl(extr, username, password):
response = extr.request(url, method="POST", data=data) response = extr.request(url, method="POST", data=data)
if not response.history: if not response.history:
raise exception.AuthenticationError() raise extr.exc.AuthenticationError()
return { return {
cookie.name: cookie.value cookie.name: cookie.value

View File

@@ -7,7 +7,7 @@
"""Extractors for https://discord.com/""" """Extractors for https://discord.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?discord\.com" BASE_PATTERN = r"(?:https?://)?discord\.com"
@@ -167,10 +167,10 @@ class DiscordExtractor(Extractor):
yield from self.extract_channel( yield from self.extract_channel(
channel["channel_id"], safe=True) channel["channel_id"], safe=True)
elif not safe: elif not safe:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"This channel type is not supported." "This channel type is not supported."
) )
except exception.HttpError as exc: except self.exc.HttpError as exc:
if not (exc.status == 403 and safe): if not (exc.status == 403 and safe):
raise raise
@@ -474,7 +474,7 @@ class DiscordAPI():
try: try:
response = self.extractor.request( response = self.extractor.request(
url, params=params, headers=self.headers) url, params=params, headers=self.headers)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 401: if exc.status == 401:
self._raise_invalid_token() self._raise_invalid_token()
raise raise
@@ -490,7 +490,7 @@ class DiscordAPI():
offset += len(data) offset += len(data)
def _raise_invalid_token(self): def _raise_invalid_token(self):
raise exception.AuthenticationError("""Invalid or missing token. raise self.exc.AuthenticationError("""Invalid or missing token.
Please provide a valid token following these instructions: Please provide a valid token following these instructions:
1) Open Discord in your browser (https://discord.com/app); 1) Open Discord in your browser (https://discord.com/app);

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.erome.com/""" """Extractors for https://www.erome.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
import itertools import itertools
@@ -74,12 +74,12 @@ class EromeAlbumExtractor(EromeExtractor):
try: try:
page = self.request(url).text page = self.request(url).text
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 410: if exc.status == 410:
msg = text.extr(exc.response.text, "<h1>", "<") msg = text.extr(exc.response.text, "<h1>", "<")
else: else:
msg = "Unable to fetch album page" msg = "Unable to fetch album page"
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"{album_id}: {msg} ({exc})") f"{album_id}: {msg} ({exc})")
title, pos = text.extract( title, pos = text.extract(

View File

@@ -9,7 +9,7 @@
"""Extractors for https://e-hentai.org/ and https://exhentai.org/""" """Extractors for https://e-hentai.org/ and https://exhentai.org/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
import collections import collections
import itertools import itertools
@@ -53,13 +53,13 @@ class ExhentaiExtractor(Extractor):
response = Extractor.request(self, url, **kwargs) response = Extractor.request(self, url, **kwargs)
if "Cache-Control" not in response.headers and not response.content: if "Cache-Control" not in response.headers and not response.content:
self.log.info("blank page") self.log.info("blank page")
raise exception.AuthorizationError() raise self.exc.AuthorizationError()
return response return response
def login(self): def login(self):
"""Login and set necessary cookies""" """Login and set necessary cookies"""
if self.LIMIT: if self.LIMIT:
raise exception.AbortExtraction("Image limit reached!") raise self.exc.AbortExtraction("Image limit reached!")
if self.cookies_check(self.cookies_names): if self.cookies_check(self.cookies_names):
return return
@@ -99,9 +99,9 @@ class ExhentaiExtractor(Extractor):
content = response.content content = response.content
if b"You are now logged in as:" not in content: if b"You are now logged in as:" not in content:
if b"The captcha was not entered correctly" in content: if b"The captcha was not entered correctly" in content:
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
"CAPTCHA required. Use cookies instead.") "CAPTCHA required. Use cookies instead.")
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
# collect more cookies # collect more cookies
url = self.root + "/favorites.php" url = self.root + "/favorites.php"
@@ -187,7 +187,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.image_token = text.extr(gpage, 'hentai.org/s/', '"') self.image_token = text.extr(gpage, 'hentai.org/s/', '"')
if not self.image_token: if not self.image_token:
self.log.debug("Page content:\n%s", gpage) self.log.debug("Page content:\n%s", gpage)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"Failed to extract initial image token") "Failed to extract initial image token")
ipage = self._image_page() ipage = self._image_page()
else: else:
@@ -195,7 +195,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
part = text.extr(ipage, 'hentai.org/g/', '"') part = text.extr(ipage, 'hentai.org/g/', '"')
if not part: if not part:
self.log.debug("Page content:\n%s", ipage) self.log.debug("Page content:\n%s", ipage)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"Failed to extract gallery token") "Failed to extract gallery token")
self.gallery_token = part.split("/")[1] self.gallery_token = part.split("/")[1]
gpage = self._gallery_page() gpage = self._gallery_page()
@@ -313,7 +313,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data = self.request_json(self.api_url, method="POST", json=data) data = self.request_json(self.api_url, method="POST", json=data)
if "error" in data: if "error" in data:
raise exception.AbortExtraction(data["error"]) raise self.exc.AbortExtraction(data["error"])
return data["gmetadata"][0] return data["gmetadata"][0]
@@ -338,7 +338,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["_fallback"] = self._fallback_1280(nl, self.image_num) data["_fallback"] = self._fallback_1280(nl, self.image_num)
except IndexError: except IndexError:
self.log.debug("Page content:\n%s", page) self.log.debug("Page content:\n%s", page)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Unable to parse image info for '{url}'") f"Unable to parse image info for '{url}'")
data["num"] = self.image_num data["num"] = self.image_num
@@ -389,7 +389,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
nl, request["page"], imgkey) nl, request["page"], imgkey)
except IndexError: except IndexError:
self.log.debug("Page content:\n%s", page) self.log.debug("Page content:\n%s", page)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Unable to parse image info for '{url}'") f"Unable to parse image info for '{url}'")
data["num"] = request["page"] data["num"] = request["page"]
@@ -438,7 +438,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["_fallback"] = self._fallback_mpv_1280(info, request) data["_fallback"] = self._fallback_mpv_1280(info, request)
except IndexError: except IndexError:
self.log.debug("Page content:\n%s", info) self.log.debug("Page content:\n%s", info)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Unable to parse image info for '{url}'") f"Unable to parse image info for '{url}'")
data["num"] = pnum data["num"] = pnum
@@ -465,7 +465,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if " requires GP" in page: if " requires GP" in page:
gp = self.config("gp") gp = self.config("gp")
if gp == "stop": if gp == "stop":
raise exception.AbortExtraction("Not enough GP") raise self.exc.AbortExtraction("Not enough GP")
elif gp == "wait": elif gp == "wait":
self.input("Press ENTER to continue.") self.input("Press ENTER to continue.")
return response.url return response.url
@@ -475,7 +475,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
return self.data["_url_1280"] return self.data["_url_1280"]
if " temporarily banned " in page: if " temporarily banned " in page:
raise exception.AuthorizationError("Temporarily Banned") raise self.exc.AuthorizationError("Temporarily Banned")
self._limits_exceeded() self._limits_exceeded()
return response.url return response.url
@@ -526,7 +526,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if not action or action == "stop": if not action or action == "stop":
ExhentaiExtractor.LIMIT = True ExhentaiExtractor.LIMIT = True
raise exception.AbortExtraction(msg) raise self.exc.AbortExtraction(msg)
self.log.warning(msg) self.log.warning(msg)
if action == "wait": if action == "wait":
@@ -559,12 +559,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
page = response.text page = response.text
if response.status_code == 404 and "Gallery Not Available" in page: if response.status_code == 404 and "Gallery Not Available" in page:
raise exception.AuthorizationError() raise self.exc.AuthorizationError()
if page.startswith(("Key missing", "Gallery not found")): if page.startswith(("Key missing", "Gallery not found")):
raise exception.NotFoundError("gallery") raise self.exc.NotFoundError("gallery")
if page.count("hentai.org/mpv/") > 1: if page.count("hentai.org/mpv/") > 1:
if self.gallery_token is None: if self.gallery_token is None:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"'/s/' URLs in MPV mode are not supported") "'/s/' URLs in MPV mode are not supported")
self.mpv = True self.mpv = True
return page return page
@@ -575,7 +575,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
page = self.request(url, fatal=False).text page = self.request(url, fatal=False).text
if page.startswith(("Invalid page", "Keep trying")): if page.startswith(("Invalid page", "Keep trying")):
raise exception.NotFoundError("image page") raise self.exc.NotFoundError("image page")
return page return page
def _fallback_original(self, nl, fullimg): def _fallback_original(self, nl, fullimg):

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.facebook.com/""" """Extractors for https://www.facebook.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util
from ..cache import memcache from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com" BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com"
@@ -236,12 +236,12 @@ class FacebookExtractor(Extractor):
res = self.request(url, **kwargs) res = self.request(url, **kwargs)
if res.url.startswith(self.root + "/login"): if res.url.startswith(self.root + "/login"):
raise exception.AuthRequired( raise self.exc.AuthRequired(
message=("You must be logged in to continue viewing images." + message=("You must be logged in to continue viewing images." +
LEFT_OFF_TXT)) LEFT_OFF_TXT))
if b'{"__dr":"CometErrorRoot.react"}' in res.content: if b'{"__dr":"CometErrorRoot.react"}' in res.content:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"You've been temporarily blocked from viewing images.\n" "You've been temporarily blocked from viewing images.\n"
"Please try using a different account, " "Please try using a different account, "
"using a VPN or waiting before you retry." + LEFT_OFF_TXT) "using a VPN or waiting before you retry." + LEFT_OFF_TXT)
@@ -331,7 +331,7 @@ class FacebookExtractor(Extractor):
break break
if ('"props":{"title":"This content isn\'t available right now"' in if ('"props":{"title":"This content isn\'t available right now"' in
page): page):
raise exception.AuthRequired( raise self.exc.AuthRequired(
"authenticated cookies", "profile", "authenticated cookies", "profile",
"This content isn't available right now") "This content isn't available right now")

View File

@@ -9,7 +9,7 @@
"""Extractors for https://fansly.com/""" """Extractors for https://fansly.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
import time import time
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com"
@@ -54,7 +54,7 @@ class FanslyExtractor(Extractor):
if wall["id"] == wall_id: if wall["id"] == wall_id:
break break
else: else:
raise exception.NotFoundError("wall") raise self.exc.NotFoundError("wall")
walls = (wall,) walls = (wall,)
for wall in walls: for wall in walls:

View File

@@ -7,7 +7,7 @@
"""Extractors for https://fapello.com/""" """Extractors for https://fapello.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fapello\.(?:com|su)" BASE_PATTERN = r"(?:https?://)?(?:www\.)?fapello\.(?:com|su)"
@@ -34,7 +34,7 @@ class FapelloPostExtractor(Extractor):
self.request(url, allow_redirects=False).text, self.request(url, allow_redirects=False).text,
'class="uk-align-center"', "</div>", None) 'class="uk-align-center"', "</div>", None)
if page is None: if page is None:
raise exception.NotFoundError("post") raise self.exc.NotFoundError("post")
data = { data = {
"model": self.model, "model": self.model,

View File

@@ -9,7 +9,7 @@
"""Extractors for https://fikfap.com/""" """Extractors for https://fikfap.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fikfap\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?fikfap\.com"
@@ -78,7 +78,7 @@ class FikfapPostExtractor(FikfapExtractor):
if post["postId"] == int(pid): if post["postId"] == int(pid):
return (post,) return (post,)
raise exception.NotFoundError("post") raise self.exc.NotFoundError("post")
class FikfapUserExtractor(FikfapExtractor): class FikfapUserExtractor(FikfapExtractor):

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.flickr.com/""" """Extractors for https://www.flickr.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, oauth, util, exception from .. import text, oauth, util
from ..cache import memcache from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:www\.|secure\.|m\.)?flickr\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.|secure\.|m\.)?flickr\.com"
@@ -459,14 +459,14 @@ class FlickrAPI(oauth.OAuth1API):
msg = data.get("message", "") msg = data.get("message", "")
self.log.debug("Server response: %s", data) self.log.debug("Server response: %s", data)
if data["code"] == 1: if data["code"] == 1:
raise exception.NotFoundError(self.extractor.subcategory) raise self.exc.NotFoundError(self.extractor.subcategory)
elif data["code"] == 2: elif data["code"] == 2:
raise exception.AuthorizationError(msg) raise self.exc.AuthorizationError(msg)
elif data["code"] == 98: elif data["code"] == 98:
raise exception.AuthenticationError(msg) raise self.exc.AuthenticationError(msg)
elif data["code"] == 99: elif data["code"] == 99:
raise exception.AuthorizationError(msg) raise self.exc.AuthorizationError(msg)
raise exception.AbortExtraction("API request failed: " + msg) raise self.exc.AbortExtraction("API request failed: " + msg)
return data return data
def _pagination(self, method, params, key="photos"): def _pagination(self, method, params, key="photos"):

View File

@@ -10,7 +10,7 @@
from .common import Extractor, Message from .common import Extractor, Message
from . import gelbooru_v02 from . import gelbooru_v02
from .. import text, exception from .. import text
import binascii import binascii
BASE_PATTERN = r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?" BASE_PATTERN = r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?"
@@ -33,9 +33,9 @@ class GelbooruBase():
url = self.root + "/index.php?page=dapi&q=index&json=1" url = self.root + "/index.php?page=dapi&q=index&json=1"
try: try:
data = self.request_json(url, params=params) data = self.request_json(url, params=params)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 401: if exc.status == 401:
raise exception.AuthRequired( raise self.exc.AuthRequired(
"'api-key' & 'user-id'", "the API") "'api-key' & 'user-id'", "the API")
raise raise
@@ -172,7 +172,7 @@ class GelbooruPoolExtractor(GelbooruBase,
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>") name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
if not name: if not name:
raise exception.NotFoundError("pool") raise self.exc.NotFoundError("pool")
return { return {
"pool": text.parse_int(self.pool_id), "pool": text.parse_int(self.pool_id),

View File

@@ -9,7 +9,7 @@
"""Extractors for Gelbooru Beta 0.2 sites""" """Extractors for Gelbooru Beta 0.2 sites"""
from . import booru from . import booru
from .. import text, util, exception from .. import text, util
import collections import collections
@@ -38,9 +38,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
if root.tag == "error": if root.tag == "error":
msg = root.text msg = root.text
if msg.lower().startswith("missing authentication"): if msg.lower().startswith("missing authentication"):
raise exception.AuthRequired( raise self.exc.AuthRequired(
"'api-key' & 'user-id'", "the API", msg) "'api-key' & 'user-id'", "the API", msg)
raise exception.AbortExtraction(f"'{msg}'") raise self.exc.AbortExtraction(f"'{msg}'")
return root return root
@@ -229,7 +229,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
name, pos = text.extract(page, "<h4>Pool: ", "</h4>") name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
if not name: if not name:
raise exception.NotFoundError("pool") raise self.exc.NotFoundError("pool")
self.post_ids = text.extract_iter( self.post_ids = text.extract_iter(
page, 'class="thumb" id="p', '"', pos) page, 'class="thumb" id="p', '"', pos)

View File

@@ -5,7 +5,7 @@
# published by the Free Software Foundation. # published by the Free Software Foundation.
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
from ..cache import cache from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
@@ -46,13 +46,13 @@ class GirlswithmuscleExtractor(Extractor):
url, method="POST", headers=headers, data=data) url, method="POST", headers=headers, data=data)
if not response.history: if not response.history:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
page = response.text page = response.text
if ">Wrong username or password" in page: if ">Wrong username or password" in page:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
if ">Log in<" in page: if ">Log in<" in page:
raise exception.AuthenticationError("Account data is missing") raise self.exc.AuthenticationError("Account data is missing")
return {c.name: c.value for c in response.history[0].cookies} return {c.name: c.value for c in response.history[0].cookies}
@@ -69,7 +69,7 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
url = f"{self.root}/{self.groups[0]}/" url = f"{self.root}/{self.groups[0]}/"
page = self.request(url).text page = self.request(url).text
if not page: if not page:
raise exception.NotFoundError("post") raise self.exc.NotFoundError("post")
metadata = self.metadata(page) metadata = self.metadata(page)
@@ -152,7 +152,7 @@ class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor):
response = self.request(url) response = self.request(url)
if response.history: if response.history:
msg = f'Request was redirected to "{response.url}", try logging in' msg = f'Request was redirected to "{response.url}", try logging in'
raise exception.AuthorizationError(msg) raise self.exc.AuthorizationError(msg)
page = response.text page = response.text
match = text.re(r"Page (\d+) of (\d+)").search(page) match = text.re(r"Page (\d+) of (\d+)").search(page)

View File

@@ -7,7 +7,7 @@
"""Extractors for https://gofile.io/""" """Extractors for https://gofile.io/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
from ..cache import cache, memcache from ..cache import cache, memcache
import hashlib import hashlib
@@ -44,7 +44,7 @@ class GofileFolderExtractor(Extractor):
try: try:
contents = folder.pop("children") contents = folder.pop("children")
except KeyError: except KeyError:
raise exception.AuthorizationError("Password required") raise self.exc.AuthorizationError("Password required")
num = 0 num = 0
for content in contents.values(): for content in contents.values():
@@ -94,10 +94,10 @@ class GofileFolderExtractor(Extractor):
if response["status"] != "ok": if response["status"] != "ok":
if response["status"] == "error-notFound": if response["status"] == "error-notFound":
raise exception.NotFoundError("content") raise self.exc.NotFoundError("content")
if response["status"] == "error-passwordRequired": if response["status"] == "error-passwordRequired":
raise exception.AuthorizationError("Password required") raise self.exc.AuthorizationError("Password required")
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"{endpoint} failed (Status: {response['status']})") f"{endpoint} failed (Status: {response['status']})")
return response["data"] return response["data"]

View File

@@ -7,7 +7,7 @@
"""Extractors for https://hotleak.vip/""" """Extractors for https://hotleak.vip/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
import binascii import binascii
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip" BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip"
@@ -116,7 +116,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
try: try:
response = self.request( response = self.request(
url, headers=headers, params=params, notfound=True) url, headers=headers, params=params, notfound=True)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.response.status_code == 429: if exc.response.status_code == 429:
self.wait( self.wait(
until=exc.response.headers.get("X-RateLimit-Reset")) until=exc.response.headers.get("X-RateLimit-Reset"))

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.imagebam.com/""" """Extractors for https://www.imagebam.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
class ImagebamExtractor(Extractor): class ImagebamExtractor(Extractor):
@@ -29,7 +29,7 @@ class ImagebamExtractor(Extractor):
page = self.request(self.root + path).text page = self.request(self.root + path).text
url, pos = text.extract(page, '<img src="https://images', '"') url, pos = text.extract(page, '<img src="https://images', '"')
if not url: if not url:
raise exception.NotFoundError("image") raise self.exc.NotFoundError("image")
filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0]) filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
return text.nameext_from_name(filename, { return text.nameext_from_name(filename, {

View File

@@ -10,7 +10,7 @@
"""Extractors for https://imgchest.com/""" """Extractors for https://imgchest.com/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, util, exception from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com"
@@ -40,7 +40,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
post = data["props"]["post"] post = data["props"]["post"]
except Exception: except Exception:
if "<title>Not Found</title>" in page: if "<title>Not Found</title>" in page:
raise exception.NotFoundError("gallery") raise self.exc.NotFoundError("gallery")
self.files = () self.files = ()
return {} return {}
@@ -142,11 +142,11 @@ class ImagechestAPI():
return response.json()["data"] return response.json()["data"]
elif response.status_code < 400: elif response.status_code < 400:
raise exception.AuthenticationError("Invalid API access token") raise self.exc.AuthenticationError("Invalid API access token")
elif response.status_code == 429: elif response.status_code == 429:
self.extractor.wait(seconds=600) self.extractor.wait(seconds=600)
else: else:
self.extractor.log.debug(response.text) self.extractor.log.debug(response.text)
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.imagefap.com/""" """Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
@@ -31,7 +31,7 @@ class ImagefapExtractor(Extractor):
self.log.warning("HTTP redirect to '%s'", response.url) self.log.warning("HTTP redirect to '%s'", response.url)
if msg := text.extr(response.text, '<div class="mt-4', '<'): if msg := text.extr(response.text, '<div class="mt-4', '<'):
msg = " ".join(msg.partition(">")[2].split()) msg = " ".join(msg.partition(">")[2].split())
raise exception.AbortExtraction(f"'{msg}'") raise self.exc.AbortExtraction(f"'{msg}'")
return response return response

View File

@@ -9,7 +9,7 @@
"""Collection of extractors for various imagehosts""" """Collection of extractors for various imagehosts"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
from ..cache import memcache from ..cache import memcache
@@ -81,7 +81,7 @@ class ImagehostImageExtractor(Extractor):
return () return ()
def not_found(self, resource=None): def not_found(self, resource=None):
raise exception.NotFoundError(resource or self.__class__.subcategory) raise self.exc.NotFoundError(resource or self.__class__.subcategory)
class ImxtoImageExtractor(ImagehostImageExtractor): class ImxtoImageExtractor(ImagehostImageExtractor):

View File

@@ -9,7 +9,7 @@
"""Extractors for https://imgbb.com/""" """Extractors for https://imgbb.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
@@ -60,7 +60,7 @@ class ImgbbExtractor(Extractor):
response = self.request(url, method="POST", headers=headers, data=data) response = self.request(url, method="POST", headers=headers, data=data)
if not response.history: if not response.history:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return self.cookies return self.cookies
def _pagination(self, page, url, params): def _pagination(self, page, url, params):
@@ -193,10 +193,10 @@ class ImgbbUserExtractor(ImgbbExtractor):
return self._pagination(response.text, url + "json", params) return self._pagination(response.text, url + "json", params)
if response.status_code == 301: if response.status_code == 301:
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
redirect = "HTTP redirect to " + response.headers.get("Location", "") redirect = "HTTP redirect to " + response.headers.get("Location", "")
if response.status_code == 302: if response.status_code == 302:
raise exception.AuthRequired( raise self.exc.AuthRequired(
("username & password", "authenticated cookies"), ("username & password", "authenticated cookies"),
"profile", redirect) "profile", redirect)
raise exception.AbortExtraction(redirect) raise self.exc.AbortExtraction(redirect)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://imgbox.com/""" """Extractors for https://imgbox.com/"""
from .common import Extractor, Message, AsynchronousMixin from .common import Extractor, Message, AsynchronousMixin
from .. import text, exception from .. import text
class ImgboxExtractor(Extractor): class ImgboxExtractor(Extractor):
@@ -68,7 +68,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
def get_job_metadata(self): def get_job_metadata(self):
page = self.request(self.root + "/g/" + self.gallery_key).text page = self.request(self.root + "/g/" + self.gallery_key).text
if "The specified gallery could not be found." in page: if "The specified gallery could not be found." in page:
raise exception.NotFoundError("gallery") raise self.exc.NotFoundError("gallery")
self.image_keys = text.re( self.image_keys = text.re(
r'<a href="/([^"]+)"><img alt="').findall(page) r'<a href="/([^"]+)"><img alt="').findall(page)
@@ -104,5 +104,5 @@ class ImgboxImageExtractor(ImgboxExtractor):
def get_image_metadata(self, page): def get_image_metadata(self, page):
data = ImgboxExtractor.get_image_metadata(self, page) data = ImgboxExtractor.get_image_metadata(self, page)
if not data["filename"]: if not data["filename"]:
raise exception.NotFoundError("image") raise self.exc.NotFoundError("image")
return data return data

View File

@@ -9,7 +9,7 @@
"""Extractors for https://imgur.com/""" """Extractors for https://imgur.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)" BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)"
@@ -296,7 +296,7 @@ class ImgurAPI():
return self.extractor.request_json( return self.extractor.request_json(
"https://api.imgur.com" + endpoint, "https://api.imgur.com" + endpoint,
params=params, headers=(headers or self.headers)) params=params, headers=(headers or self.headers))
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status not in (403, 429) or \ if exc.status not in (403, 429) or \
b"capacity" not in exc.response.content: b"capacity" not in exc.response.content:
raise raise

View File

@@ -9,7 +9,7 @@
"""Extractors for https://inkbunny.net/""" """Extractors for https://inkbunny.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
from ..cache import cache from ..cache import cache
@@ -278,7 +278,7 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
def posts(self): def posts(self):
submissions = self.api.detail(({"submission_id": self.submission_id},)) submissions = self.api.detail(({"submission_id": self.submission_id},))
if submissions[0] is None: if submissions[0] is None:
raise exception.NotFoundError("submission") raise self.exc.NotFoundError("submission")
return submissions return submissions
@@ -348,7 +348,7 @@ class InkbunnyAPI():
self.authenticate(invalidate=True) self.authenticate(invalidate=True)
continue continue
raise exception.AbortExtraction(data.get("error_message")) raise self.exc.AbortExtraction(data.get("error_message"))
def _pagination_search(self, params): def _pagination_search(self, params):
params["page"] = 1 params["page"] = 1
@@ -379,5 +379,5 @@ def _authenticate_impl(api, username, password):
data = api.extractor.request_json(url, method="POST", data=data) data = api.extractor.request_json(url, method="POST", data=data)
if "sid" not in data: if "sid" not in data:
raise exception.AuthenticationError(data.get("error_message")) raise Extractor.exc.AuthenticationError(data.get("error_message"))
return data["sid"] return data["sid"]

View File

@@ -10,7 +10,7 @@
"""Extractors for https://www.instagram.com/""" """Extractors for https://www.instagram.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util
from ..cache import cache, memcache from ..cache import cache, memcache
import itertools import itertools
import binascii import binascii
@@ -143,7 +143,7 @@ class InstagramExtractor(Extractor):
page = None page = None
if page is not None: if page is not None:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"HTTP redirect to {page} page ({url.partition('?')[0]})") f"HTTP redirect to {page} page ({url.partition('?')[0]})")
www_claim = response.headers.get("x-ig-set-www-claim") www_claim = response.headers.get("x-ig-set-www-claim")
@@ -678,7 +678,7 @@ class InstagramStoriesExtractor(InstagramExtractor):
reel["items"] = (item,) reel["items"] = (item,)
break break
else: else:
raise exception.NotFoundError("story") raise self.exc.NotFoundError("story")
elif self.config("split"): elif self.config("split"):
reel = reels[0] reel = reels[0]
@@ -860,7 +860,7 @@ class InstagramRestAPI():
try: try:
return self._call(endpoint, params=params)["reels_media"] return self._call(endpoint, params=params)["reels_media"]
except KeyError: except KeyError:
raise exception.AuthRequired("authenticated cookies") raise self.exc.AuthRequired("authenticated cookies")
def reels_tray(self): def reels_tray(self):
endpoint = "/v1/feed/reels_tray/" endpoint = "/v1/feed/reels_tray/"
@@ -893,7 +893,7 @@ class InstagramRestAPI():
return self._call( return self._call(
endpoint, params=params, notfound="user")["data"]["user"] endpoint, params=params, notfound="user")["data"]["user"]
except KeyError: except KeyError:
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
def user_by_search(self, username): def user_by_search(self, username):
url = "https://www.instagram.com/web/search/topsearch/" url = "https://www.instagram.com/web/search/topsearch/"
@@ -914,7 +914,7 @@ class InstagramRestAPI():
if user := self.user_by_name(screen_name): if user := self.user_by_name(screen_name):
return user return user
self.user_by_name.invalidate(screen_name) self.user_by_name.invalidate(screen_name)
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
def user_id(self, screen_name, check_private=True): def user_id(self, screen_name, check_private=True):
if screen_name.startswith("id:"): if screen_name.startswith("id:"):
@@ -1087,7 +1087,7 @@ class InstagramGraphqlAPI():
self.user_id = api.user_id self.user_id = api.user_id
def _unsupported(self, _=None): def _unsupported(self, _=None):
raise exception.AbortExtraction("Unsupported with GraphQL API") raise self.exc.AbortExtraction("Unsupported with GraphQL API")
def highlights_tray(self, user_id): def highlights_tray(self, user_id):
query_hash = "d4d88dc1500312af6f937f7b804c68c3" query_hash = "d4d88dc1500312af6f937f7b804c68c3"
@@ -1175,7 +1175,7 @@ class InstagramGraphqlAPI():
elif not data["edges"]: elif not data["edges"]:
user = self.extractor.item user = self.extractor.item
s = "" if user.endswith("s") else "s" s = "" if user.endswith("s") else "s"
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"{user}'{s} posts are private") f"{user}'{s} posts are private")
variables["after"] = extr._update_cursor(info["end_cursor"]) variables["after"] = extr._update_cursor(info["end_cursor"])

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.iwara.tv/""" """Extractors for https://www.iwara.tv/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util
from ..cache import cache, memcache from ..cache import cache, memcache
import hashlib import hashlib
@@ -100,7 +100,7 @@ class IwaraExtractor(Extractor):
if type == "user": if type == "user":
return self.items_user(results) return self.items_user(results)
raise exception.AbortExtraction(f"Unsupported result type '{type}'") raise self.exc.AbortExtraction(f"Unsupported result type '{type}'")
def extract_media_info(self, item, key, include_file_info=True): def extract_media_info(self, item, key, include_file_info=True):
info = { info = {
@@ -344,7 +344,7 @@ class IwaraAPI():
def favorites(self, type): def favorites(self, type):
if not self.username: if not self.username:
raise exception.AuthRequired( raise self.exc.AuthRequired(
"username & password", "your favorites") "username & password", "your favorites")
endpoint = f"/favorites/{type}s" endpoint = f"/favorites/{type}s"
return self._pagination(endpoint) return self._pagination(endpoint)
@@ -398,7 +398,7 @@ class IwaraAPI():
if not (refresh_token := data.get("token")): if not (refresh_token := data.get("token")):
self.extractor.log.debug(data) self.extractor.log.debug(data)
raise exception.AuthenticationError(data.get("message")) raise self.exc.AuthenticationError(data.get("message"))
_refresh_token_cache.update(username, refresh_token) _refresh_token_cache.update(username, refresh_token)
self.extractor.log.info("Refreshing access token for %s", username) self.extractor.log.info("Refreshing access token for %s", username)
@@ -410,7 +410,7 @@ class IwaraAPI():
if not (access_token := data.get("accessToken")): if not (access_token := data.get("accessToken")):
self.extractor.log.debug(data) self.extractor.log.debug(data)
raise exception.AuthenticationError(data.get("message")) raise self.exc.AuthenticationError(data.get("message"))
return "Bearer " + access_token return "Bearer " + access_token
def _call(self, endpoint, params=None, headers=None): def _call(self, endpoint, params=None, headers=None):

View File

@@ -9,7 +9,7 @@
"""Extractors for https://kabe-uchiroom.com/""" """Extractors for https://kabe-uchiroom.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
class KabeuchiUserExtractor(Extractor): class KabeuchiUserExtractor(Extractor):
@@ -47,7 +47,7 @@ class KabeuchiUserExtractor(Extractor):
url = f"{self.root}/mypage/?id={uid}" url = f"{self.root}/mypage/?id={uid}"
response = self.request(url) response = self.request(url)
if response.history and response.url == self.root + "/": if response.history and response.url == self.root + "/":
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
target_id = text.extr(response.text, 'user_friend_id = "', '"') target_id = text.extr(response.text, 'user_friend_id = "', '"')
return self._pagination(target_id) return self._pagination(target_id)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://kemono.cr/""" """Extractors for https://kemono.cr/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache, memcache from ..cache import cache, memcache
import itertools import itertools
import json import json
@@ -98,7 +98,7 @@ class KemonoExtractor(Extractor):
try: try:
creator = creator_info[key] = self.api.creator_profile( creator = creator_info[key] = self.api.creator_profile(
service, creator_id) service, creator_id)
except exception.HttpError: except self.exc.HttpError:
self.log.warning("%s/%s/%s: 'Creator not found'", self.log.warning("%s/%s/%s: 'Creator not found'",
service, creator_id, post["id"]) service, creator_id, post["id"])
creator = creator_info[key] = util.NONE creator = creator_info[key] = util.NONE
@@ -211,7 +211,7 @@ class KemonoExtractor(Extractor):
msg = f'"{response.json()["error"]}"' msg = f'"{response.json()["error"]}"'
except Exception: except Exception:
msg = '"Username or password is incorrect"' msg = '"Username or password is incorrect"'
raise exception.AuthenticationError(msg) raise self.exc.AuthenticationError(msg)
return {c.name: c.value for c in response.cookies} return {c.name: c.value for c in response.cookies}
@@ -399,7 +399,7 @@ class KemonoPostExtractor(KemonoExtractor):
if str(rev["revision_id"]) == revision_id: if str(rev["revision_id"]) == revision_id:
return (rev,) return (rev,)
raise exception.NotFoundError("revision") raise self.exc.NotFoundError("revision")
class KemonoDiscordExtractor(KemonoExtractor): class KemonoDiscordExtractor(KemonoExtractor):
@@ -419,7 +419,7 @@ class KemonoDiscordExtractor(KemonoExtractor):
server, channels = discord_server_info(self, server_id) server, channels = discord_server_info(self, server_id)
channel = channels[channel_id] channel = channels[channel_id]
except Exception: except Exception:
raise exception.NotFoundError("channel") raise self.exc.NotFoundError("channel")
data = { data = {
"server" : server["name"], "server" : server["name"],

View File

@@ -9,7 +9,7 @@
"""Extractors for https://downloads.khinsider.com/""" """Extractors for https://downloads.khinsider.com/"""
from .common import Extractor, Message, AsynchronousMixin from .common import Extractor, Message, AsynchronousMixin
from .. import text, exception from .. import text
class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor): class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
@@ -32,7 +32,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
url = self.root + "/game-soundtracks/album/" + self.album url = self.root + "/game-soundtracks/album/" + self.album
page = self.request(url, encoding="utf-8").text page = self.request(url, encoding="utf-8").text
if "Download all songs at once:" not in page: if "Download all songs at once:" not in page:
raise exception.NotFoundError("soundtrack") raise self.exc.NotFoundError("soundtrack")
data = self.metadata(page) data = self.metadata(page)
yield Message.Directory, "", data yield Message.Directory, "", data

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.lofter.com/""" """Extractors for https://www.lofter.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
class LofterExtractor(Extractor): class LofterExtractor(Extractor):
@@ -132,11 +132,11 @@ class LofterAPI():
info = response.json() info = response.json()
if info["meta"]["status"] == 4200: if info["meta"]["status"] == 4200:
raise exception.NotFoundError("blog") raise self.exc.NotFoundError("blog")
if info["meta"]["status"] != 200: if info["meta"]["status"] != 200:
self.extractor.log.debug("Server response: %s", info) self.extractor.log.debug("Server response: %s", info)
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")
return info["response"] return info["response"]

View File

@@ -9,7 +9,7 @@
"""Extractors for https://members.luscious.net/""" """Extractors for https://members.luscious.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
class LusciousExtractor(Extractor): class LusciousExtractor(Extractor):
@@ -32,7 +32,7 @@ class LusciousExtractor(Extractor):
if response.status_code >= 400: if response.status_code >= 400:
self.log.debug("Server response: %s", response.text) self.log.debug("Server response: %s", response.text)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"GraphQL query failed " f"GraphQL query failed "
f"('{response.status_code} {response.reason}')") f"('{response.status_code} {response.reason}')")
@@ -82,7 +82,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
album = self._request_graphql("AlbumGet", variables)["album"]["get"] album = self._request_graphql("AlbumGet", variables)["album"]["get"]
if "errors" in album: if "errors" in album:
raise exception.NotFoundError("album") raise self.exc.NotFoundError("album")
album["audiences"] = [item["title"] for item in album["audiences"]] album["audiences"] = [item["title"] for item in album["audiences"]]
album["genres"] = [item["title"] for item in album["genres"]] album["genres"] = [item["title"] for item in album["genres"]]

View File

@@ -9,7 +9,7 @@
"""Extractors for https://manga.madokami.al/""" """Extractors for https://manga.madokami.al/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
BASE_PATTERN = r"(?:https?://)?manga\.madokami\.al" BASE_PATTERN = r"(?:https?://)?manga\.madokami\.al"
@@ -31,7 +31,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
def items(self): def items(self):
username, password = self._get_auth_info() username, password = self._get_auth_info()
if not username: if not username:
raise exception.AuthRequired("username & password") raise self.exc.AuthRequired("username & password")
self.session.auth = util.HTTPBasicAuth(username, password) self.session.auth = util.HTTPBasicAuth(username, password)
url = f"{self.root}/Manga/{self.groups[0]}" url = f"{self.root}/Manga/{self.groups[0]}"

View File

@@ -9,7 +9,7 @@
"""Extractors for https://mangadex.org/""" """Extractors for https://mangadex.org/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache, memcache from ..cache import cache, memcache
from collections import defaultdict from collections import defaultdict
@@ -129,7 +129,7 @@ class MangadexChapterExtractor(MangadexExtractor):
data = self._transform(chapter) data = self._transform(chapter)
if data.get("_external_url") and not data["count"]: if data.get("_external_url") and not data["count"]:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Chapter {data['chapter']}{data['chapter_minor']} is not " f"Chapter {data['chapter']}{data['chapter_minor']} is not "
f"available on MangaDex and can instead be read on the " f"available on MangaDex and can instead be read on the "
f"official publisher's website at {data['_external_url']}.") f"official publisher's website at {data['_external_url']}.")
@@ -333,7 +333,7 @@ class MangadexAPI():
try: try:
access_token = data["access_token"] access_token = data["access_token"]
except Exception: except Exception:
raise exception.AuthenticationError(data.get("error_description")) raise self.exc.AuthenticationError(data.get("error_description"))
if refresh_token != data.get("refresh_token"): if refresh_token != data.get("refresh_token"):
_refresh_token_cache.update( _refresh_token_cache.update(
@@ -356,7 +356,7 @@ class MangadexAPI():
data = self.extractor.request_json( data = self.extractor.request_json(
url, method="POST", json=json, fatal=None) url, method="POST", json=json, fatal=None)
if data.get("result") != "ok": if data.get("result") != "ok":
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
if refresh_token != data["token"]["refresh"]: if refresh_token != data["token"]["refresh"]:
_refresh_token_cache.update(username, data["token"]["refresh"]) _refresh_token_cache.update(username, data["token"]["refresh"])
@@ -381,7 +381,7 @@ class MangadexAPI():
msg = ", ".join(f'{error["title"]}: "{error["detail"]}"' msg = ", ".join(f'{error["title"]}: "{error["detail"]}"'
for error in response.json()["errors"]) for error in response.json()["errors"])
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"{response.status_code} {response.reason} ({msg})") f"{response.status_code} {response.reason} ({msg})")
def _pagination_chapters(self, endpoint, params=None, auth=False): def _pagination_chapters(self, endpoint, params=None, auth=False):

View File

@@ -9,7 +9,7 @@
"""Extractors for https://mangafire.to/""" """Extractors for https://mangafire.to/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, exception from .. import text
from ..cache import memcache from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to" BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to"
@@ -42,7 +42,7 @@ class MangafireChapterExtractor(MangafireBase, ChapterExtractor):
chapters = _manga_chapters(self, (manga_id, self.type, lang)) chapters = _manga_chapters(self, (manga_id, self.type, lang))
anchor = chapters[chapter_info] anchor = chapters[chapter_info]
except KeyError: except KeyError:
raise exception.NotFoundError("chapter") raise self.exc.NotFoundError("chapter")
self.chapter_id = text.extr(anchor, 'data-id="', '"') self.chapter_id = text.extr(anchor, 'data-id="', '"')
return { return {

View File

@@ -10,7 +10,7 @@
"""Extractors for https://www.mangakakalot.gg/ and mirror sites""" """Extractors for https://www.mangakakalot.gg/ and mirror sites"""
from .common import BaseExtractor, ChapterExtractor, MangaExtractor, Message from .common import BaseExtractor, ChapterExtractor, MangaExtractor, Message
from .. import text, util, exception from .. import text, util
class ManganeloExtractor(BaseExtractor): class ManganeloExtractor(BaseExtractor):
@@ -144,7 +144,7 @@ class ManganeloBookmarkExtractor(ManganeloExtractor):
response = self.request(url, params=params) response = self.request(url, params=params)
if response.history: if response.history:
raise exception.AuthRequired( raise self.exc.AuthRequired(
"authenticated cookies", "your bookmarks") "authenticated cookies", "your bookmarks")
page = response.text page = response.text
last = text.parse_int(text.extr(page, ">Last(", ")")) last = text.parse_int(text.extr(page, ">Last(", ")"))

View File

@@ -9,7 +9,7 @@
"""Extractors for https://mangapark.net/""" """Extractors for https://mangapark.net/"""
from .common import ChapterExtractor, Extractor, Message from .common import ChapterExtractor, Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import memcache from ..cache import memcache
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:" BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:"
@@ -175,5 +175,5 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
not lang or data["lang"] == lang): not lang or data["lang"] == lang):
return data["id"] return data["id"]
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"'{source}' does not match any available source") f"'{source}' does not match any available source")

View File

@@ -7,7 +7,7 @@
"""Extractors for https://mangaread.org/""" """Extractors for https://mangaread.org/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, exception from .. import text
class MangareadBase(): class MangareadBase():
@@ -40,7 +40,7 @@ class MangareadChapterExtractor(MangareadBase, ChapterExtractor):
data = {"tags": list(text.split_html(tags)[::2])} data = {"tags": list(text.split_html(tags)[::2])}
info = text.extr(page, '<h1 id="chapter-heading">', "</h1>") info = text.extr(page, '<h1 id="chapter-heading">', "</h1>")
if not info: if not info:
raise exception.NotFoundError("chapter") raise self.exc.NotFoundError("chapter")
self.parse_chapter_string(info, data) self.parse_chapter_string(info, data)
return data return data
@@ -61,7 +61,7 @@ class MangareadMangaExtractor(MangareadBase, MangaExtractor):
def chapters(self, page): def chapters(self, page):
if 'class="error404' in page: if 'class="error404' in page:
raise exception.NotFoundError("manga") raise self.exc.NotFoundError("manga")
data = self.metadata(page) data = self.metadata(page)
results = [] results = []
for chapter in text.extract_iter( for chapter in text.extract_iter(

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.mangoxo.com/""" """Extractors for https://www.mangoxo.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
from ..cache import cache from ..cache import cache
import hashlib import hashlib
import time import time
@@ -50,7 +50,7 @@ class MangoxoExtractor(Extractor):
data = response.json() data = response.json()
if str(data.get("result")) != "1": if str(data.get("result")) != "1":
raise exception.AuthenticationError(data.get("msg")) raise self.exc.AuthenticationError(data.get("msg"))
return {"SESSION": self.cookies.get("SESSION")} return {"SESSION": self.cookies.get("SESSION")}
def _sign_by_md5(self, username, password, token): def _sign_by_md5(self, username, password, token):

View File

@@ -9,7 +9,7 @@
"""Extractors for Mastodon instances""" """Extractors for Mastodon instances"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text, exception from .. import text
from ..cache import cache from ..cache import cache
@@ -246,7 +246,7 @@ class MastodonAPI():
if account["acct"] == username: if account["acct"] == username:
self.extractor._check_moved(account) self.extractor._check_moved(account)
return account["id"] return account["id"]
raise exception.NotFoundError("account") raise self.exc.NotFoundError("account")
def account_bookmarks(self): def account_bookmarks(self):
"""Statuses the user has bookmarked""" """Statuses the user has bookmarked"""
@@ -312,16 +312,16 @@ class MastodonAPI():
if code < 400: if code < 400:
return response return response
if code == 401: if code == 401:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Invalid or missing access token.\nRun 'gallery-dl oauth:" f"Invalid or missing access token.\nRun 'gallery-dl oauth:"
f"mastodon:{self.extractor.instance}' to obtain one.") f"mastodon:{self.extractor.instance}' to obtain one.")
if code == 404: if code == 404:
raise exception.NotFoundError() raise self.exc.NotFoundError()
if code == 429: if code == 429:
self.extractor.wait(until=self.extractor.parse_datetime_iso( self.extractor.wait(until=self.extractor.parse_datetime_iso(
response.headers["x-ratelimit-reset"])) response.headers["x-ratelimit-reset"]))
continue continue
raise exception.AbortExtraction(response.json().get("error")) raise self.exc.AbortExtraction(response.json().get("error"))
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
url = endpoint url = endpoint

View File

@@ -7,7 +7,7 @@
"""Extractors for Misskey instances""" """Extractors for Misskey instances"""
from .common import BaseExtractor, Message, Dispatch from .common import BaseExtractor, Message, Dispatch
from .. import text, dt, exception from .. import text, dt
from ..cache import memcache from ..cache import memcache
@@ -239,7 +239,7 @@ class MisskeyAPI():
def i_favorites(self): def i_favorites(self):
endpoint = "/i/favorites" endpoint = "/i/favorites"
if not self.access_token: if not self.access_token:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
data = {"i": self.access_token} data = {"i": self.access_token}
return self._pagination(endpoint, data) return self._pagination(endpoint, data)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://motherless.com/""" """Extractors for https://motherless.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, dt, exception from .. import text, dt
from ..cache import memcache from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?motherless\.com" BASE_PATTERN = r"(?:https?://)?motherless\.com"
@@ -28,7 +28,7 @@ class MotherlessExtractor(Extractor):
content = response.content content = response.content
if (b'<div class="error-page' in content or if (b'<div class="error-page' in content or
b">The page you're looking for cannot be found.<" in content): b">The page you're looking for cannot be found.<" in content):
raise exception.NotFoundError("page") raise self.exc.NotFoundError("page")
self.request = Extractor.request.__get__(self) self.request = Extractor.request.__get__(self)
return response return response

View File

@@ -7,7 +7,7 @@
"""Extractors for https://myhentaigallery.com/""" """Extractors for https://myhentaigallery.com/"""
from .common import Extractor, GalleryExtractor, Message from .common import Extractor, GalleryExtractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com" BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com"
@@ -40,7 +40,7 @@ class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor):
title = title[4:] title = title[4:]
if not title: if not title:
raise exception.NotFoundError("gallery") raise self.exc.NotFoundError("gallery")
return { return {
"title" : text.unescape(title), "title" : text.unescape(title),

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.myportfolio.com/""" """Extractors for https://www.myportfolio.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
class MyportfolioGalleryExtractor(Extractor): class MyportfolioGalleryExtractor(Extractor):
@@ -34,7 +34,7 @@ class MyportfolioGalleryExtractor(Extractor):
url = "https://" + self.domain + (self.path or "") url = "https://" + self.domain + (self.path or "")
response = self.request(url) response = self.request(url)
if response.history and response.url.endswith(".adobe.com/missing"): if response.history and response.url.endswith(".adobe.com/missing"):
raise exception.NotFoundError() raise self.exc.NotFoundError()
page = response.text page = response.text
projects = text.extr( projects = text.extr(
@@ -72,7 +72,7 @@ class MyportfolioGalleryExtractor(Extractor):
elif user: elif user:
user, _, title = user.partition(" - ") user, _, title = user.partition(" - ")
else: else:
raise exception.NotFoundError() raise self.exc.NotFoundError()
return { return {
"user": text.unescape(user), "user": text.unescape(user),

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.newgrounds.com/""" """Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, dt, exception from .. import text, util, dt
from ..cache import cache from ..cache import cache
import itertools import itertools
@@ -143,7 +143,7 @@ class NewgroundsExtractor(Extractor):
if result.get("success"): if result.get("success"):
break break
if "errors" in result: if "errors" in result:
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
'"' + '", "'.join(result["errors"]) + '"') '"' + '", "'.join(result["errors"]) + '"')
if result.get("requiresMfa"): if result.get("requiresMfa"):
@@ -370,7 +370,7 @@ class NewgroundsExtractor(Extractor):
return return
if "errors" in data: if "errors" in data:
msg = ", ".join(text.unescape(e) for e in data["errors"]) msg = ", ".join(text.unescape(e) for e in data["errors"])
raise exception.AbortExtraction(msg) raise self.exc.AbortExtraction(msg)
items = data.get("items") items = data.get("items")
if not items: if not items:

View File

@@ -9,7 +9,7 @@
"""Extractors for nijie instances""" """Extractors for nijie instances"""
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
from .. import text, dt, exception from .. import text, dt
from ..cache import cache from ..cache import cache
@@ -134,7 +134,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
if username: if username:
return self.cookies_update(self._login_impl(username, password)) return self.cookies_update(self._login_impl(username, password))
raise exception.AuthenticationError("Username and password required") raise self.exc.AuthenticationError("Username and password required")
@cache(maxage=90*86400, keyarg=1) @cache(maxage=90*86400, keyarg=1)
def _login_impl(self, username, password): def _login_impl(self, username, password):
@@ -145,7 +145,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
response = self.request(url, method="POST", data=data) response = self.request(url, method="POST", data=data)
if "/login.php" in response.text: if "/login.php" in response.text:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return self.cookies return self.cookies
def _pagination(self, path): def _pagination(self, path):

View File

@@ -9,7 +9,7 @@
"""Utility classes to setup OAuth and link accounts to gallery-dl""" """Utility classes to setup OAuth and link accounts to gallery-dl"""
from .common import Extractor from .common import Extractor
from .. import text, oauth, util, config, exception from .. import text, oauth, util, config
from ..output import stdout_write from ..output import stdout_write
from ..cache import cache, memcache from ..cache import cache, memcache
@@ -74,7 +74,7 @@ class OAuthBase(Extractor):
msg = "Received invalid" msg = "Received invalid"
if exc: if exc:
exc = f" ({exc.__class__.__name__}: {exc})" exc = f" ({exc.__class__.__name__}: {exc})"
raise exception.AbortExtraction(f"{msg} OAuth response{exc}") raise self.exc.AbortExtraction(f"{msg} OAuth response{exc}")
def send(self, msg): def send(self, msg):
"""Send 'msg' to the socket opened in 'recv()'""" """Send 'msg' to the socket opened in 'recv()'"""
@@ -396,7 +396,7 @@ class OAuthMastodon(OAuthBase):
data = self.request_json(url, method="POST", data=data) data = self.request_json(url, method="POST", data=data)
if "client_id" not in data or "client_secret" not in data: if "client_id" not in data or "client_secret" not in data:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Failed to register new application: '{data}'") f"Failed to register new application: '{data}'")
data["client-id"] = data.pop("client_id") data["client-id"] = data.pop("client_id")

View File

@@ -9,7 +9,7 @@
"""Extractors for https://rule34.paheal.net/""" """Extractors for https://rule34.paheal.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
class PahealExtractor(Extractor): class PahealExtractor(Extractor):
@@ -98,7 +98,7 @@ class PahealTagExtractor(PahealExtractor):
while True: while True:
try: try:
page = self.request(base + str(pnum)).text page = self.request(base + str(pnum)).text
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 404: if exc.status == 404:
return return
raise raise
@@ -152,7 +152,7 @@ class PahealPostExtractor(PahealExtractor):
def get_posts(self): def get_posts(self):
try: try:
return (self._extract_post(self.groups[0]),) return (self._extract_post(self.groups[0]),)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 404: if exc.status == 404:
return () return ()
raise raise

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.patreon.com/""" """Extractors for https://www.patreon.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, dt, exception from .. import text, util, dt
from ..cache import memcache from ..cache import memcache
import collections import collections
import itertools import itertools
@@ -347,7 +347,7 @@ class PatreonExtractor(Extractor):
except Exception: except Exception:
pass pass
raise exception.AbortExtraction("Unable to extract bootstrap data") raise self.exc.AbortExtraction("Unable to extract bootstrap data")
class PatreonCollectionExtractor(PatreonExtractor): class PatreonCollectionExtractor(PatreonExtractor):
@@ -428,12 +428,12 @@ class PatreonCreatorExtractor(PatreonExtractor):
data = None data = None
data = self._extract_bootstrap(page) data = self._extract_bootstrap(page)
return data["campaign"]["data"]["id"] return data["campaign"]["data"]["id"]
except exception.ControlException: except self.exc.ControlException:
pass pass
except Exception as exc: except Exception as exc:
if data: if data:
self.log.debug(data) self.log.debug(data)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Unable to extract campaign ID " f"Unable to extract campaign ID "
f"({exc.__class__.__name__}: {exc})") f"({exc.__class__.__name__}: {exc})")
@@ -442,7 +442,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
page, r'{\"value\":{\"campaign\":{\"data\":{\"id\":\"', '\\"'): page, r'{\"value\":{\"campaign\":{\"data\":{\"id\":\"', '\\"'):
return cid return cid
raise exception.AbortExtraction("Failed to extract campaign ID") raise self.exc.AbortExtraction("Failed to extract campaign ID")
def _get_filters(self, params): def _get_filters(self, params):
return "".join( return "".join(

View File

@@ -9,7 +9,7 @@
"""Extractors for https://pexels.com/""" """Extractors for https://pexels.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com"
@@ -174,7 +174,7 @@ class PexelsAPI():
else: else:
self.extractor.log.debug(response.text) self.extractor.log.debug(response.text)
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
while True: while True:

View File

@@ -9,7 +9,7 @@
"""Extractors for Philomena sites""" """Extractors for Philomena sites"""
from .booru import BooruExtractor from .booru import BooruExtractor
from .. import text, exception from .. import text
class PhilomenaExtractor(BooruExtractor): class PhilomenaExtractor(BooruExtractor):
@@ -113,7 +113,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
try: try:
return {"gallery": self.api.gallery(self.groups[-1])} return {"gallery": self.api.gallery(self.groups[-1])}
except IndexError: except IndexError:
raise exception.NotFoundError("gallery") raise self.exc.NotFoundError("gallery")
def posts(self): def posts(self):
gallery_id = "gallery_id:" + self.groups[-1] gallery_id = "gallery_id:" + self.groups[-1]
@@ -159,7 +159,7 @@ class PhilomenaAPI():
# error # error
self.extractor.log.debug(response.content) self.extractor.log.debug(response.content)
raise exception.HttpError("", response) raise self.exc.HttpError("", response)
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
extr = self.extractor extr = self.extractor

View File

@@ -7,7 +7,7 @@
"""Extractors for https://pholder.com/""" """Extractors for https://pholder.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?pholder\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?pholder\.com"
@@ -50,7 +50,7 @@ class PholderExtractor(Extractor):
except ValueError: except ValueError:
pass pass
raise exception.AbortExtraction("Could not locate window.data JSON.") raise self.exc.AbortExtraction("Could not locate window.data JSON.")
def _posts(self, page_url): def _posts(self, page_url):
params = {"page": 1} params = {"page": 1}

View File

@@ -10,7 +10,7 @@
from .common import Extractor, Message from .common import Extractor, Message
from ..cache import cache from ..cache import cache
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social" BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
@@ -108,7 +108,7 @@ class PillowfortExtractor(Extractor):
response = self.request(url, method="POST", headers=headers, data=data) response = self.request(url, method="POST", headers=headers, data=data)
if not response.history: if not response.history:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return { return {
cookie.name: cookie.value cookie.name: cookie.value

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.pinterest.com/""" """Extractors for https://www.pinterest.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
import itertools import itertools
BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+" BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
@@ -394,7 +394,7 @@ class PinterestPinitExtractor(PinterestExtractor):
f"/{self.groups[0]}/redirect/") f"/{self.groups[0]}/redirect/")
location = self.request_location(url) location = self.request_location(url)
if not location: if not location:
raise exception.NotFoundError("pin") raise self.exc.NotFoundError("pin")
elif PinterestPinExtractor.pattern.match(location): elif PinterestPinExtractor.pattern.match(location):
yield Message.Queue, location, { yield Message.Queue, location, {
"_extractor": PinterestPinExtractor} "_extractor": PinterestPinExtractor}
@@ -402,7 +402,7 @@ class PinterestPinitExtractor(PinterestExtractor):
yield Message.Queue, location, { yield Message.Queue, location, {
"_extractor": PinterestBoardExtractor} "_extractor": PinterestBoardExtractor}
else: else:
raise exception.NotFoundError("pin") raise self.exc.NotFoundError("pin")
class PinterestAPI(): class PinterestAPI():
@@ -545,9 +545,9 @@ class PinterestAPI():
return data return data
if response.status_code == 404: if response.status_code == 404:
resource = self.extractor.subcategory.rpartition("-")[2] resource = self.extractor.subcategory.rpartition("-")[2]
raise exception.NotFoundError(resource) raise self.exc.NotFoundError(resource)
self.extractor.log.debug("Server response: %s", response.text) self.extractor.log.debug("Server response: %s", response.text)
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")
def _pagination(self, resource, options): def _pagination(self, resource, options):
while True: while True:

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.pixiv.net/""" """Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, dt, exception from .. import text, util, dt
from ..cache import cache, memcache from ..cache import cache, memcache
import itertools import itertools
import hashlib import hashlib
@@ -205,7 +205,7 @@ class PixivExtractor(Extractor):
url = f"{base}0.{ext}" url = f"{base}0.{ext}"
self.request(url, method="HEAD") self.request(url, method="HEAD")
break break
except exception.HttpError: except self.exc.HttpError:
pass pass
else: else:
self.log.warning( self.log.warning(
@@ -329,7 +329,7 @@ class PixivExtractor(Extractor):
url = f"{base}_p0.{ext}" url = f"{base}_p0.{ext}"
self.request(url, method="HEAD") self.request(url, method="HEAD")
return url return url
except exception.HttpError: except self.exc.HttpError:
pass pass
def _sanitize_ajax_caption(self, caption): def _sanitize_ajax_caption(self, caption):
@@ -721,7 +721,7 @@ class PixivRankingExtractor(PixivExtractor):
try: try:
self.mode = mode = mode_map[mode] self.mode = mode = mode_map[mode]
except KeyError: except KeyError:
raise exception.AbortExtraction(f"Invalid mode '{mode}'") raise self.exc.AbortExtraction(f"Invalid mode '{mode}'")
if date := query.get("date"): if date := query.get("date"):
if len(date) == 8 and date.isdecimal(): if len(date) == 8 and date.isdecimal():
@@ -772,7 +772,7 @@ class PixivSearchExtractor(PixivExtractor):
try: try:
self.word = query["word"] self.word = query["word"]
except KeyError: except KeyError:
raise exception.AbortExtraction("Missing search term") raise self.exc.AbortExtraction("Missing search term")
sort = query.get("order", "date_d") sort = query.get("order", "date_d")
sort_map = { sort_map = {
@@ -785,7 +785,7 @@ class PixivSearchExtractor(PixivExtractor):
try: try:
self.sort = sort = sort_map[sort] self.sort = sort = sort_map[sort]
except KeyError: except KeyError:
raise exception.AbortExtraction(f"Invalid search order '{sort}'") raise self.exc.AbortExtraction(f"Invalid search order '{sort}'")
target = query.get("s_mode", "s_tag_full") target = query.get("s_mode", "s_tag_full")
target_map = { target_map = {
@@ -796,7 +796,7 @@ class PixivSearchExtractor(PixivExtractor):
try: try:
self.target = target = target_map[target] self.target = target = target_map[target]
except KeyError: except KeyError:
raise exception.AbortExtraction(f"Invalid search mode '{target}'") raise self.exc.AbortExtraction(f"Invalid search mode '{target}'")
self.date_start = query.get("scd") self.date_start = query.get("scd")
self.date_end = query.get("ecd") self.date_end = query.get("ecd")
@@ -1153,7 +1153,7 @@ class PixivAppAPI():
@cache(maxage=3600, keyarg=1) @cache(maxage=3600, keyarg=1)
def _login_impl(self, username): def _login_impl(self, username):
if not self.refresh_token: if not self.refresh_token:
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
"'refresh-token' required.\n" "'refresh-token' required.\n"
"Run `gallery-dl oauth:pixiv` to get one.") "Run `gallery-dl oauth:pixiv` to get one.")
@@ -1178,7 +1178,7 @@ class PixivAppAPI():
url, method="POST", headers=headers, data=data, fatal=False) url, method="POST", headers=headers, data=data, fatal=False)
if response.status_code >= 400: if response.status_code >= 400:
self.log.debug(response.text) self.log.debug(response.text)
raise exception.AuthenticationError("Invalid refresh token") raise self.exc.AuthenticationError("Invalid refresh token")
data = response.json()["response"] data = response.json()["response"]
return data["user"], "Bearer " + data["access_token"] return data["user"], "Bearer " + data["access_token"]
@@ -1305,7 +1305,7 @@ class PixivAppAPI():
self.log.debug(data) self.log.debug(data)
if response.status_code == 404: if response.status_code == 404:
raise exception.NotFoundError() raise self.exc.NotFoundError()
error = data["error"] error = data["error"]
if "rate limit" in (error.get("message") or "").lower(): if "rate limit" in (error.get("message") or "").lower():
@@ -1315,7 +1315,7 @@ class PixivAppAPI():
msg = (f"'{msg}'" if (msg := error.get("user_message")) else msg = (f"'{msg}'" if (msg := error.get("user_message")) else
f"'{msg}'" if (msg := error.get("message")) else f"'{msg}'" if (msg := error.get("message")) else
error) error)
raise exception.AbortExtraction("API request failed: " + msg) raise self.exc.AbortExtraction("API request failed: " + msg)
def _pagination(self, endpoint, params, def _pagination(self, endpoint, params,
key_items="illusts", key_data=None, key_user=None): key_items="illusts", key_data=None, key_user=None):
@@ -1326,7 +1326,7 @@ class PixivAppAPI():
if key_user is not None and not data[key_user].get("id"): if key_user is not None and not data[key_user].get("id"):
user = self.user_detail(self.extractor.user_id, fatal=False) user = self.user_detail(self.extractor.user_id, fatal=False)
if user.get("error"): if user.get("error"):
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
return return
while True: while True:

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.pixnet.net/""" """Extractors for https://www.pixnet.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?!www\.)([\w-]+)\.pixnet.net" BASE_PATTERN = r"(?:https?://)?(?!www\.)([\w-]+)\.pixnet.net"
@@ -52,7 +52,7 @@ class PixnetExtractor(Extractor):
pnext = text.extr(page, 'class="nextBtn"', '>') pnext = text.extr(page, 'class="nextBtn"', '>')
if pnext is None and 'name="albumpass">' in page: if pnext is None and 'name="albumpass">' in page:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Album {self.item_id} is password-protected.") f"Album {self.item_id} is password-protected.")
if "href" not in pnext: if "href" not in pnext:
return return

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.plurk.com/""" """Extractors for https://www.plurk.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, dt, exception from .. import text, util, dt
class PlurkExtractor(Extractor): class PlurkExtractor(Extractor):
@@ -59,7 +59,7 @@ class PlurkExtractor(Extractor):
def _load(self, data): def _load(self, data):
if not data: if not data:
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
return util.json_loads( return util.json_loads(
text.re(r"new Date\(([^)]+)\)").sub(r"\1", data)) text.re(r"new Date\(([^)]+)\)").sub(r"\1", data))

View File

@@ -7,7 +7,7 @@
"""Extractors for http://www.poringa.net/""" """Extractors for http://www.poringa.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
from ..cache import cache from ..cache import cache
import itertools import itertools
@@ -32,7 +32,7 @@ class PoringaExtractor(Extractor):
try: try:
response = self.request(url) response = self.request(url)
except exception.HttpError as exc: except self.exc.HttpError as exc:
self.log.warning( self.log.warning(
"Unable to fetch posts for '%s' (%s)", post_id, exc) "Unable to fetch posts for '%s' (%s)", post_id, exc)
continue continue

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.pornhub.com/""" """Extractors for https://www.pornhub.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com" BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com"
@@ -109,7 +109,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
data = self.request_json(url, params=params) data = self.request_json(url, params=params)
if not (images := data.get("photos")): if not (images := data.get("photos")):
raise exception.AuthorizationError() raise self.exc.AuthorizationError()
key = end = self._first key = end = self._first
results = [] results = []

View File

@@ -7,7 +7,7 @@
"""Extractors for Postmill instances""" """Extractors for Postmill instances"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text, exception from .. import text
class PostmillExtractor(BaseExtractor): class PostmillExtractor(BaseExtractor):
@@ -102,7 +102,7 @@ class PostmillSubmissionsExtractor(PostmillExtractor):
if response.history: if response.history:
redirect_url = response.url redirect_url = response.url
if redirect_url == self.root + "/login": if redirect_url == self.root + "/login":
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"HTTP redirect to login page ({redirect_url})") f"HTTP redirect to login page ({redirect_url})")
page = response.text page = response.text

View File

@@ -9,7 +9,7 @@
"""Extractors for https://readcomiconline.li/""" """Extractors for https://readcomiconline.li/"""
from .common import Extractor, ChapterExtractor, MangaExtractor from .common import Extractor, ChapterExtractor, MangaExtractor
from .. import text, exception from .. import text
import binascii import binascii
BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)" BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)"
@@ -36,7 +36,7 @@ class ReadcomiconlineBase():
"the CAPTCHA, and press ENTER to continue", response.url) "the CAPTCHA, and press ENTER to continue", response.url)
self.input() self.input()
else: else:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Redirect to \n{response.url}\nVisit this URL in your " f"Redirect to \n{response.url}\nVisit this URL in your "
f"browser and solve the CAPTCHA to continue") f"browser and solve the CAPTCHA to continue")

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.reddit.com/""" """Extractors for https://www.reddit.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
@@ -525,7 +525,7 @@ class RedditAPI():
if response.status_code != 200: if response.status_code != 200:
self.log.debug("Server response: %s", data) self.log.debug("Server response: %s", data)
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
f"\"{data.get('error')}: {data.get('message')}\"") f"\"{data.get('error')}: {data.get('message')}\"")
return "Bearer " + data["access_token"] return "Bearer " + data["access_token"]
@@ -555,16 +555,16 @@ class RedditAPI():
try: try:
data = response.json() data = response.json()
except ValueError: except ValueError:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
text.remove_html(response.text)) text.remove_html(response.text))
if "error" in data: if "error" in data:
if data["error"] == 403: if data["error"] == 403:
raise exception.AuthorizationError() raise self.exc.AuthorizationError()
if data["error"] == 404: if data["error"] == 404:
raise exception.NotFoundError() raise self.exc.NotFoundError()
self.log.debug(data) self.log.debug(data)
raise exception.AbortExtraction(data.get("message")) raise self.exc.AbortExtraction(data.get("message"))
return data return data
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
@@ -592,7 +592,7 @@ class RedditAPI():
if post["num_comments"] and self.comments: if post["num_comments"] and self.comments:
try: try:
yield self.submission(post["id"]) yield self.submission(post["id"])
except exception.AuthorizationError: except self.exc.AuthorizationError:
pass pass
else: else:
yield post, () yield post, ()

View File

@@ -9,7 +9,7 @@
"""Extractors for https://rule34.xyz/""" """Extractors for https://rule34.xyz/"""
from .booru import BooruExtractor from .booru import BooruExtractor
from .. import text, exception from .. import text
from ..cache import cache from ..cache import cache
import collections import collections
@@ -129,7 +129,7 @@ class Rule34xyzExtractor(BooruExtractor):
if jwt := response.get("jwt"): if jwt := response.get("jwt"):
return "Bearer " + jwt return "Bearer " + jwt
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
(msg := response.get("message")) and f'"{msg}"') (msg := response.get("message")) and f'"{msg}"')

View File

@@ -10,7 +10,7 @@
from .booru import BooruExtractor from .booru import BooruExtractor
from .common import Message from .common import Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
import collections import collections
@@ -284,7 +284,7 @@ class SankakuAPI():
if response.status_code == 429: if response.status_code == 429:
until = response.headers.get("X-RateLimit-Reset") until = response.headers.get("X-RateLimit-Reset")
if not until and b"_tags-explicit-limit" in response.content: if not until and b"_tags-explicit-limit" in response.content:
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
"Search tag limit exceeded") "Search tag limit exceeded")
seconds = None if until else 600 seconds = None if until else 600
self.extractor.wait(until=until, seconds=seconds) self.extractor.wait(until=until, seconds=seconds)
@@ -305,7 +305,7 @@ class SankakuAPI():
code = f"'{code.rpartition('__')[2].replace('-', ' ')}'" code = f"'{code.rpartition('__')[2].replace('-', ' ')}'"
except Exception: except Exception:
pass pass
raise exception.AbortExtraction(code) raise self.exc.AbortExtraction(code)
return data return data
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
@@ -365,5 +365,5 @@ def _authenticate_impl(extr, username, password):
data = response.json() data = response.json()
if response.status_code >= 400 or not data.get("success"): if response.status_code >= 400 or not data.get("success"):
raise exception.AuthenticationError(data.get("error")) raise extr.exc.AuthenticationError(data.get("error"))
return "Bearer " + data["access_token"] return "Bearer " + data["access_token"]

View File

@@ -9,7 +9,7 @@
"""Extractors for https://niyaniya.moe/""" """Extractors for https://niyaniya.moe/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, exception from .. import text
import collections import collections
BASE_PATTERN = ( BASE_PATTERN = (
@@ -66,7 +66,7 @@ class SchalenetworkExtractor(Extractor):
if token := self.config("token"): if token := self.config("token"):
return "Bearer " + token.rpartition(' ')[2] return "Bearer " + token.rpartition(' ')[2]
if required: if required:
raise exception.AuthRequired("'token'", "your favorites") raise self.exc.AuthRequired("'token'", "your favorites")
def _crt(self): def _crt(self):
crt = self.config("crt") crt = self.config("crt")
@@ -88,7 +88,7 @@ class SchalenetworkExtractor(Extractor):
msg = None msg = None
else: else:
msg = f"{exc.status} {exc.response.reason}" msg = f"{exc.status} {exc.response.reason}"
raise exception.AuthRequired( raise self.exc.AuthRequired(
"'crt' query parameter & matching 'user-agent'", None, msg) "'crt' query parameter & matching 'user-agent'", None, msg)
@@ -153,7 +153,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
try: try:
data_fmt = self.request_json( data_fmt = self.request_json(
url, method="POST", headers=headers) url, method="POST", headers=headers)
except exception.HttpError as exc: except self.exc.HttpError as exc:
self._require_auth(exc) self._require_auth(exc)
self.fmt = self._select_format(data_fmt["data"]) self.fmt = self._select_format(data_fmt["data"])
@@ -217,7 +217,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
self.log.debug("%s: Format %s is not available", self.log.debug("%s: Format %s is not available",
self.groups[1], fmtid) self.groups[1], fmtid)
else: else:
raise exception.NotFoundError("format") raise self.exc.NotFoundError("format")
self.log.debug("%s: Selected format %s", self.groups[1], fmtid) self.log.debug("%s: Selected format %s", self.groups[1], fmtid)
fmt["w"] = fmtid fmt["w"] = fmtid

View File

@@ -9,7 +9,7 @@
"""Extractors for https://scrolller.com/""" """Extractors for https://scrolller.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
@@ -82,9 +82,9 @@ class ScrolllerExtractor(Extractor):
try: try:
data = self._request_graphql("LoginQuery", variables, False) data = self._request_graphql("LoginQuery", variables, False)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 403: if exc.status == 403:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
raise raise
return data["login"]["token"] return data["login"]["token"]
@@ -206,7 +206,7 @@ class ScrolllerFollowingExtractor(ScrolllerExtractor):
self.login() self.login()
if not self.auth_token: if not self.auth_token:
raise exception.AuthorizationError("Login required") raise self.exc.AuthorizationError("Login required")
variables = { variables = {
"iterator": None, "iterator": None,

View File

@@ -9,7 +9,7 @@
"""Extractors for https://seiga.nicovideo.jp/""" """Extractors for https://seiga.nicovideo.jp/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
@@ -45,7 +45,7 @@ class SeigaExtractor(Extractor):
url = f"{self.root}/image/source/{image_id}" url = f"{self.root}/image/source/{image_id}"
location = self.request_location(url, notfound="image") location = self.request_location(url, notfound="image")
if "nicovideo.jp/login" in location: if "nicovideo.jp/login" in location:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"HTTP redirect to login page ({location.partition('?')[0]})") f"HTTP redirect to login page ({location.partition('?')[0]})")
return location.replace("/o/", "/priv/", 1) return location.replace("/o/", "/priv/", 1)
@@ -57,7 +57,7 @@ class SeigaExtractor(Extractor):
if username: if username:
return self.cookies_update(self._login_impl(username, password)) return self.cookies_update(self._login_impl(username, password))
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
"username & password or 'user_session' cookie required") "username & password or 'user_session' cookie required")
@cache(maxage=365*86400, keyarg=1) @cache(maxage=365*86400, keyarg=1)
@@ -76,7 +76,7 @@ class SeigaExtractor(Extractor):
response = self.request(url, method="POST", data=data) response = self.request(url, method="POST", data=data)
if "message=cant_login" in response.url: if "message=cant_login" in response.url:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
if "/mfa" in response.url: if "/mfa" in response.url:
page = response.text page = response.text
@@ -93,7 +93,7 @@ class SeigaExtractor(Extractor):
if not response.history and \ if not response.history and \
b"Confirmation code is incorrect" in response.content: b"Confirmation code is incorrect" in response.content:
raise exception.AuthenticationError( raise self.exc.AuthenticationError(
"Incorrect Confirmation Code") "Incorrect Confirmation Code")
return { return {
@@ -133,7 +133,7 @@ class SeigaUserExtractor(SeigaExtractor):
))[0] ))[0]
if not data["name"] and "ユーザー情報が取得出来ませんでした" in page: if not data["name"] and "ユーザー情報が取得出来ませんでした" in page:
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
return { return {
"user": { "user": {

View File

@@ -9,7 +9,7 @@
"""Extract hentai-manga from https://www.simply-hentai.com/""" """Extract hentai-manga from https://www.simply-hentai.com/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, util, exception from .. import text, util
class SimplyhentaiGalleryExtractor(GalleryExtractor): class SimplyhentaiGalleryExtractor(GalleryExtractor):
@@ -38,7 +38,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
title = extr('<meta property="og:title" content="', '"') title = extr('<meta property="og:title" content="', '"')
image = extr('<meta property="og:image" content="', '"') image = extr('<meta property="og:image" content="', '"')
if not title: if not title:
raise exception.NotFoundError("gallery") raise self.exc.NotFoundError("gallery")
data = { data = {
"title" : text.unescape(title), "title" : text.unescape(title),
"gallery_id": text.parse_int(image.split("/")[-2]), "gallery_id": text.parse_int(image.split("/")[-2]),

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.smugmug.com/""" """Extractors for https://www.smugmug.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, oauth, exception from .. import text, oauth
BASE_PATTERN = ( BASE_PATTERN = (
r"(?:smugmug:(?!album:)(?:https?://)?([^/]+)|" r"(?:smugmug:(?!album:)(?:https?://)?([^/]+)|"
@@ -209,17 +209,17 @@ class SmugmugAPI(oauth.OAuth1API):
if 200 <= data["Code"] < 400: if 200 <= data["Code"] < 400:
return data return data
if data["Code"] == 404: if data["Code"] == 404:
raise exception.NotFoundError() raise self.exc.NotFoundError()
if data["Code"] == 429: if data["Code"] == 429:
raise exception.AbortExtraction("Rate limit reached") raise self.exc.AbortExtraction("Rate limit reached")
self.log.debug(data) self.log.debug(data)
raise exception.AbortExtraction("API request failed") raise self.exc.AbortExtraction("API request failed")
def _expansion(self, endpoint, expands, params=None): def _expansion(self, endpoint, expands, params=None):
endpoint = self._extend(endpoint, expands) endpoint = self._extend(endpoint, expands)
result = self._apply_expansions(self._call(endpoint, params), expands) result = self._apply_expansions(self._call(endpoint, params), expands)
if not result: if not result:
raise exception.NotFoundError() raise self.exc.NotFoundError()
return result[0] return result[0]
def _pagination(self, endpoint, expands=None): def _pagination(self, endpoint, expands=None):

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.steamgriddb.com""" """Extractors for https://www.steamgriddb.com"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com"
@@ -74,7 +74,7 @@ class SteamgriddbExtractor(Extractor):
def _call(self, endpoint, **kwargs): def _call(self, endpoint, **kwargs):
data = self.request_json(self.root + endpoint, **kwargs) data = self.request_json(self.root + endpoint, **kwargs)
if not data["success"]: if not data["success"]:
raise exception.AbortExtraction(data["error"]) raise self.exc.AbortExtraction(data["error"])
return data["data"] return data["data"]
@@ -96,7 +96,7 @@ class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
sort = self.config("sort", "score_desc") sort = self.config("sort", "score_desc")
if sort not in ("score_desc", "score_asc", "score_old_desc", if sort not in ("score_desc", "score_asc", "score_old_desc",
"score_old_asc", "age_desc", "age_asc"): "score_old_asc", "age_desc", "age_asc"):
raise exception.AbortExtraction(f"Invalid sort '{sort}'") raise self.exc.AbortExtraction(f"Invalid sort '{sort}'")
json = { json = {
"static" : self.config("static", True), "static" : self.config("static", True),
@@ -149,7 +149,7 @@ class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
for i in value: for i in value:
if i not in valid_values: if i not in valid_values:
raise exception.AbortExtraction(f"Invalid {type_name} '{i}'") raise self.exc.AbortExtraction(f"Invalid {type_name} '{i}'")
return value return value
@@ -169,7 +169,7 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor):
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
asset = self._call(endpoint)["asset"] asset = self._call(endpoint)["asset"]
if asset is None: if asset is None:
raise exception.NotFoundError( raise self.exc.NotFoundError(
f"asset ({self.asset_type}:{self.asset_id})") f"asset ({self.asset_type}:{self.asset_id})")
return (asset,) return (asset,)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.subscribestar.com/""" """Extractors for https://www.subscribestar.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)" BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
@@ -71,7 +71,7 @@ class SubscribestarExtractor(Extractor):
if response.history and ( if response.history and (
"/verify_subscriber" in response.url or "/verify_subscriber" in response.url or
"/age_confirmation_warning" in response.url): "/age_confirmation_warning" in response.url):
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"HTTP redirect to " + response.url) "HTTP redirect to " + response.url)
content = response.content content = response.content
@@ -127,7 +127,7 @@ class SubscribestarExtractor(Extractor):
msg = f'"{errors.popitem()[1]}"' msg = f'"{errors.popitem()[1]}"'
except Exception: except Exception:
msg = None msg = None
raise exception.AuthenticationError(msg) raise self.exc.AuthenticationError(msg)
return response return response
# submit username / email # submit username / email

View File

@@ -9,7 +9,7 @@
"""Extractors for https://tapas.io/""" """Extractors for https://tapas.io/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
from ..cache import cache from ..cache import cache
BASE_PATTERN = r"(?:https?://)?tapas\.io" BASE_PATTERN = r"(?:https?://)?tapas\.io"
@@ -61,7 +61,7 @@ class TapasExtractor(Extractor):
if not response.history or \ if not response.history or \
"/account/signin_fail" in response.history[-1].url: "/account/signin_fail" in response.history[-1].url:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return {"_cpc_": response.history[0].cookies.get("_cpc_")} return {"_cpc_": response.history[0].cookies.get("_cpc_")}
@@ -84,7 +84,7 @@ class TapasEpisodeExtractor(TapasExtractor):
episode = data["episode"] episode = data["episode"]
if not episode.get("free") and not episode.get("unlocked"): if not episode.get("free") and not episode.get("unlocked"):
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
f"{episode_id}: Episode '{episode['title']}' not unlocked") f"{episode_id}: Episode '{episode['title']}' not unlocked")
html = data["html"] html = data["html"]

View File

@@ -7,7 +7,7 @@
"""Extractors for https://thefap.net/""" """Extractors for https://thefap.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net" BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net"
@@ -47,7 +47,7 @@ class ThefapPostExtractor(ThefapExtractor):
page = self.request(self.root + path).text page = self.request(self.root + path).text
if "Not Found" in page: if "Not Found" in page:
raise exception.NotFoundError("post") raise self.exc.NotFoundError("post")
if model_name := text.extr(page, "<title>", " / "): if model_name := text.extr(page, "<title>", " / "):
model_name = text.unescape(model_name) model_name = text.unescape(model_name)
@@ -86,7 +86,7 @@ class ThefapModelExtractor(ThefapExtractor):
page = self.request(url).text page = self.request(url).text
if 'id="content"' not in page: if 'id="content"' not in page:
raise exception.NotFoundError("model") raise self.exc.NotFoundError("model")
if model_name := text.extr(page, "<h2", "</h2>"): if model_name := text.extr(page, "<h2", "</h2>"):
model_name = text.unescape(model_name[model_name.find(">")+1:]) model_name = text.unescape(model_name[model_name.find(">")+1:])

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.tiktok.com/""" """Extractors for https://www.tiktok.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, ytdl, exception from .. import text, util, ytdl
import functools import functools
import itertools import itertools
import binascii import binascii
@@ -166,7 +166,7 @@ class TiktokExtractor(Extractor):
try: try:
response = self.request(url) response = self.request(url)
if response.history and "/login" in response.url: if response.history and "/login" in response.url:
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
"HTTP redirect to login page " "HTTP redirect to login page "
f"('{response.url.partition('?')[0]}')") f"('{response.url.partition('?')[0]}')")
html = response.text html = response.text
@@ -227,14 +227,14 @@ class TiktokExtractor(Extractor):
data["webapp.app-context"] data["webapp.app-context"]
data = data["webapp.user-detail"] data = data["webapp.user-detail"]
if not self._check_status_code(data, profile_url, "profile"): if not self._check_status_code(data, profile_url, "profile"):
raise exception.ExtractionError( raise self.exc.ExtractionError(
f"{profile_url}: could not extract rehydration data") f"{profile_url}: could not extract rehydration data")
try: try:
for key in additional_keys: for key in additional_keys:
data = data[key] data = data[key]
except KeyError as exc: except KeyError as exc:
self.log.traceback(exc) self.log.traceback(exc)
raise exception.ExtractionError( raise self.exc.ExtractionError(
"%s: could not extract rehydration data (%s)", "%s: could not extract rehydration data (%s)",
profile_url, ", ".join(additional_keys)) profile_url, ", ".join(additional_keys))
return data return data
@@ -258,7 +258,7 @@ class TiktokExtractor(Extractor):
if test.digest() == expected: if test.digest() == expected:
break break
else: else:
raise exception.ExtractionError("failed to find matching digest") raise self.exc.ExtractionError("failed to find matching digest")
# extract cookie names # extract cookie names
wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"') wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"')
@@ -278,7 +278,7 @@ class TiktokExtractor(Extractor):
sec_uid = self._extract_id( sec_uid = self._extract_id(
profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid") profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")
if sec_uid is None: if sec_uid is None:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"{user_name}: unable to extract secondary user ID") f"{user_name}: unable to extract secondary user ID")
return sec_uid return sec_uid
@@ -286,7 +286,7 @@ class TiktokExtractor(Extractor):
author_id = self._extract_id( author_id = self._extract_id(
profile_url, user_name, r"[0-9]+", "id") profile_url, user_name, r"[0-9]+", "id")
if author_id is None: if author_id is None:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"{user_name}: unable to extract user ID") f"{user_name}: unable to extract user ID")
return author_id return author_id
@@ -306,7 +306,7 @@ class TiktokExtractor(Extractor):
video = post["video"] video = post["video"]
urls = self._extract_video_urls(video) urls = self._extract_video_urls(video)
if not urls: if not urls:
raise exception.ExtractionError( raise self.exc.ExtractionError(
f"{post['id']}: Failed to extract video URLs. " f"{post['id']}: Failed to extract video URLs. "
f"You may need cookies to continue.") f"You may need cookies to continue.")
@@ -533,7 +533,7 @@ class TiktokVmpostExtractor(TiktokExtractor):
url = self.request_location(url, headers=headers, notfound="post") url = self.request_location(url, headers=headers, notfound="post")
if not url or len(url) <= 28: if not url or len(url) <= 28:
# https://www.tiktok.com/?_r=1 # https://www.tiktok.com/?_r=1
raise exception.NotFoundError("post") raise self.exc.NotFoundError("post")
data = {"_extractor": TiktokPostExtractor} data = {"_extractor": TiktokPostExtractor}
yield Message.Queue, url.partition("?")[0], data yield Message.Queue, url.partition("?")[0], data
@@ -944,7 +944,7 @@ class TiktokTimeCursor(TiktokPaginationCursor):
elif not self.reverse and (new_cursor < self.cursor or no_cursor): elif not self.reverse and (new_cursor < self.cursor or no_cursor):
new_cursor = self.fallback_cursor(data) new_cursor = self.fallback_cursor(data)
elif no_cursor: elif no_cursor:
raise exception.ExtractionError("Could not extract next cursor") raise self.exc.ExtractionError("Could not extract next cursor")
self.cursor = new_cursor self.cursor = new_cursor
return not data.get(self.has_more_key, False) return not data.get(self.has_more_key, False)
@@ -1273,7 +1273,7 @@ class TiktokPaginationRequest:
extractor.log.warning("%s: TikTok API keeps sending the same " extractor.log.warning("%s: TikTok API keeps sending the same "
"page. Taking measures to avoid an infinite " "page. Taking measures to avoid an infinite "
"loop", url) "loop", url)
raise exception.ExtractionError( raise self.exc.ExtractionError(
"TikTok API keeps sending the same page") "TikTok API keeps sending the same page")

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.tumblr.com/""" """Extractors for https://www.tumblr.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, dt, oauth, exception from .. import text, util, dt, oauth
BASE_PATTERN = ( BASE_PATTERN = (
@@ -473,7 +473,7 @@ class TumblrAPI(oauth.OAuth1API):
self.log.debug(data) self.log.debug(data)
if status == 403: if status == 403:
raise exception.AuthorizationError() raise self.exc.AuthorizationError()
elif status == 404: elif status == 404:
try: try:
@@ -492,8 +492,8 @@ class TumblrAPI(oauth.OAuth1API):
else: else:
self.log.info("Run 'gallery-dl oauth:tumblr' " self.log.info("Run 'gallery-dl oauth:tumblr' "
"to access dashboard-only blogs") "to access dashboard-only blogs")
raise exception.AuthorizationError(error) raise self.exc.AuthorizationError(error)
raise exception.NotFoundError("user or post") raise self.exc.NotFoundError("user or post")
elif status == 429: elif status == 429:
# daily rate limit # daily rate limit
@@ -514,7 +514,7 @@ class TumblrAPI(oauth.OAuth1API):
continue continue
t = (dt.now() + dt.timedelta(0, float(reset))).time() t = (dt.now() + dt.timedelta(0, float(reset))).time()
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Aborting - Rate limit will reset at " f"Aborting - Rate limit will reset at "
f"{t.hour:02}:{t.minute:02}:{t.second:02}") f"{t.hour:02}:{t.minute:02}:{t.second:02}")
@@ -524,7 +524,7 @@ class TumblrAPI(oauth.OAuth1API):
self.extractor.wait(seconds=reset) self.extractor.wait(seconds=reset)
continue continue
raise exception.AbortExtraction(data) raise self.exc.AbortExtraction(data)
def _pagination(self, endpoint, params, def _pagination(self, endpoint, params,
blog=None, key="posts", cache=False): blog=None, key="posts", cache=False):

View File

@@ -9,7 +9,7 @@
"""Extractors for https://twibooru.org/""" """Extractors for https://twibooru.org/"""
from .booru import BooruExtractor from .booru import BooruExtractor
from .. import text, exception from .. import text
import operator import operator
BASE_PATTERN = r"(?:https?://)?(?:www\.)?twibooru\.org" BASE_PATTERN = r"(?:https?://)?(?:www\.)?twibooru\.org"
@@ -153,7 +153,7 @@ class TwibooruAPI():
# error # error
self.extractor.log.debug(response.content) self.extractor.log.debug(response.content)
raise exception.HttpError("", response) raise self.exc.HttpError("", response)
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
extr = self.extractor extr = self.extractor

View File

@@ -9,7 +9,7 @@
"""Extractors for https://x.com/""" """Extractors for https://x.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, dt, exception from .. import text, util, dt
from ..cache import cache, memcache from ..cache import cache, memcache
import itertools import itertools
import random import random
@@ -906,7 +906,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
return self.api.user_media return self.api.user_media
if strategy == "with_replies": if strategy == "with_replies":
return self.api.user_tweets_and_replies return self.api.user_tweets_and_replies
raise exception.AbortExtraction(f"Invalid strategy '{strategy}'") raise self.exc.AbortExtraction(f"Invalid strategy '{strategy}'")
class TwitterTweetsExtractor(TwitterExtractor): class TwitterTweetsExtractor(TwitterExtractor):
@@ -1092,7 +1092,7 @@ class TwitterTweetExtractor(TwitterExtractor):
try: try:
self._assign_user(tweet["core"]["user_results"]["result"]) self._assign_user(tweet["core"]["user_results"]["result"])
except KeyError: except KeyError:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"'{tweet.get('reason') or 'Unavailable'}'") f"'{tweet.get('reason') or 'Unavailable'}'")
yield tweet yield tweet
@@ -1403,10 +1403,10 @@ class TwitterAPI():
if tweet.get("__typename") == "TweetUnavailable": if tweet.get("__typename") == "TweetUnavailable":
reason = tweet.get("reason") reason = tweet.get("reason")
if reason in {"NsfwViewerHasNoStatedAge", "NsfwLoggedOut"}: if reason in {"NsfwViewerHasNoStatedAge", "NsfwLoggedOut"}:
raise exception.AuthRequired(message="NSFW Tweet") raise self.exc.AuthRequired(message="NSFW Tweet")
if reason == "Protected": if reason == "Protected":
raise exception.AuthRequired(message="Protected Tweet") raise self.exc.AuthRequired(message="Protected Tweet")
raise exception.AbortExtraction(f"Tweet unavailable ('{reason}')") raise self.exc.AbortExtraction(f"Tweet unavailable ('{reason}')")
return tweet return tweet
@@ -1754,9 +1754,9 @@ class TwitterAPI():
return user["rest_id"] return user["rest_id"]
except KeyError: except KeyError:
if user and user.get("__typename") == "UserUnavailable": if user and user.get("__typename") == "UserUnavailable":
raise exception.NotFoundError(user["message"], False) raise self.exc.NotFoundError(user["message"], False)
else: else:
raise exception.NotFoundError("user") raise self.exc.NotFoundError("user")
@cache(maxage=3600) @cache(maxage=3600)
def _guest_token(self): def _guest_token(self):
@@ -1835,13 +1835,13 @@ class TwitterAPI():
if "this account is temporarily locked" in msg: if "this account is temporarily locked" in msg:
msg = "Account temporarily locked" msg = "Account temporarily locked"
if self.extractor.config("locked") != "wait": if self.extractor.config("locked") != "wait":
raise exception.AuthorizationError(msg) raise self.exc.AuthorizationError(msg)
self.log.warning(msg) self.log.warning(msg)
self.extractor.input("Press ENTER to retry.") self.extractor.input("Press ENTER to retry.")
retry = True retry = True
elif "Could not authenticate you" in msg: elif "Could not authenticate you" in msg:
raise exception.AbortExtraction(f"'{msg}'") raise self.exc.AbortExtraction(f"'{msg}'")
elif msg.lower().startswith("timeout"): elif msg.lower().startswith("timeout"):
retry = True retry = True
@@ -1858,7 +1858,7 @@ class TwitterAPI():
return data return data
elif response.status_code in {403, 404} and \ elif response.status_code in {403, 404} and \
not self.headers["x-twitter-auth-type"]: not self.headers["x-twitter-auth-type"]:
raise exception.AuthRequired( raise self.exc.AuthRequired(
"authenticated cookies", "timeline") "authenticated cookies", "timeline")
elif response.status_code == 429: elif response.status_code == 429:
self._handle_ratelimit(response) self._handle_ratelimit(response)
@@ -1870,7 +1870,7 @@ class TwitterAPI():
except Exception: except Exception:
pass pass
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"{response.status_code} {response.reason} ({errors})") f"{response.status_code} {response.reason} ({errors})")
def _pagination_rest(self, endpoint, params): def _pagination_rest(self, endpoint, params):
@@ -2065,13 +2065,13 @@ class TwitterAPI():
self.headers["x-twitter-auth-type"] = None self.headers["x-twitter-auth-type"] = None
extr.log.info("Retrying API request as guest") extr.log.info("Retrying API request as guest")
continue continue
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
user["screen_name"] + " blocked your account") user["screen_name"] + " blocked your account")
elif user.get("protected"): elif user.get("protected"):
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
user["screen_name"] + "'s Tweets are protected") user["screen_name"] + "'s Tweets are protected")
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"Unable to retrieve Tweets from this timeline") "Unable to retrieve Tweets from this timeline")
tweets = [] tweets = []
@@ -2301,7 +2301,7 @@ class TwitterAPI():
def _handle_ratelimit(self, response): def _handle_ratelimit(self, response):
rl = self.extractor.config("ratelimit") rl = self.extractor.config("ratelimit")
if rl == "abort": if rl == "abort":
raise exception.AbortExtraction("Rate limit exceeded") raise self.exc.AbortExtraction("Rate limit exceeded")
until = response.headers.get("x-rate-limit-reset") until = response.headers.get("x-rate-limit-reset")
seconds = None if until else 60.0 seconds = None if until else 60.0
@@ -2313,7 +2313,7 @@ class TwitterAPI():
num = text.parse_int(num) num = text.parse_int(num)
msg = f"Rate limit exceeded ({amt}/{num})" msg = f"Rate limit exceeded ({amt}/{num})"
if amt >= num: if amt >= num:
raise exception.AbortExtraction(msg) raise self.exc.AbortExtraction(msg)
self.log.warning(msg) self.log.warning(msg)
self._ratelimit_amt = amt + 1 self._ratelimit_amt = amt + 1
elif rl == "wait": elif rl == "wait":

View File

@@ -7,7 +7,7 @@
"""Extractors for https://urlgalleries.net/""" """Extractors for https://urlgalleries.net/"""
from .common import GalleryExtractor, Message from .common import GalleryExtractor, Message
from .. import text, exception from .. import text
class UrlgalleriesGalleryExtractor(GalleryExtractor): class UrlgalleriesGalleryExtractor(GalleryExtractor):
@@ -29,8 +29,8 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
if 300 <= response.status_code < 500: if 300 <= response.status_code < 500:
if response.headers.get("location", "").endswith( if response.headers.get("location", "").endswith(
"/not_found_adult.php"): "/not_found_adult.php"):
raise exception.NotFoundError("gallery") raise self.exc.NotFoundError("gallery")
raise exception.HttpError(None, response) raise self.exc.HttpError(None, response)
page = response.text page = response.text
imgs = self.images(page) imgs = self.images(page)

View File

@@ -7,7 +7,6 @@
"""Extractors for general-purpose URL shorteners""" """Extractors for general-purpose URL shorteners"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import exception
class UrlshortenerExtractor(BaseExtractor): class UrlshortenerExtractor(BaseExtractor):
@@ -40,5 +39,5 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor):
location = self.request_location( location = self.request_location(
url, headers=self.config_instance("headers"), notfound="URL") url, headers=self.config_instance("headers"), notfound="URL")
if not location: if not location:
raise exception.AbortExtraction("Unable to resolve short URL") raise self.exc.AbortExtraction("Unable to resolve short URL")
yield Message.Queue, location, {} yield Message.Queue, location, {}

View File

@@ -9,7 +9,7 @@
"""Extractors for https://vipergirls.to/""" """Extractors for https://vipergirls.to/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to" BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
@@ -104,7 +104,7 @@ class VipergirlsExtractor(Extractor):
response = self.request(url, method="POST", data=data) response = self.request(url, method="POST", data=data)
if not response.cookies.get("vg_password"): if not response.cookies.get("vg_password"):
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return {cookie.name: cookie.value return {cookie.name: cookie.value
for cookie in response.cookies} for cookie in response.cookies}

View File

@@ -9,7 +9,7 @@
"""Extractors for https://vk.com/""" """Extractors for https://vk.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com" BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
@@ -100,13 +100,13 @@ class VkExtractor(Extractor):
response = self.request( response = self.request(
url, method="POST", headers=headers, data=data) url, method="POST", headers=headers, data=data)
if response.history and "/challenge.html" in response.url: if response.history and "/challenge.html" in response.url:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
"HTTP redirect to 'challenge' page:\n" + response.url) "HTTP redirect to 'challenge' page:\n" + response.url)
payload = response.json()["payload"][1] payload = response.json()["payload"][1]
if len(payload) < 4: if len(payload) < 4:
self.log.debug(payload) self.log.debug(payload)
raise exception.AuthorizationError( raise self.exc.AuthorizationError(
text.unescape(payload[0]) if payload[0] else None) text.unescape(payload[0]) if payload[0] else None)
total = payload[1] total = payload[1]

View File

@@ -9,7 +9,7 @@
"""Extractors for https://wallhaven.cc/""" """Extractors for https://wallhaven.cc/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, exception from .. import text
class WallhavenExtractor(Extractor): class WallhavenExtractor(Extractor):
@@ -199,7 +199,7 @@ class WallhavenAPI():
continue continue
self.extractor.log.debug("Server response: %s", response.text) self.extractor.log.debug("Server response: %s", response.text)
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"API request failed " f"API request failed "
f"({response.status_code} {response.reason})") f"({response.status_code} {response.reason})")

View File

@@ -10,7 +10,7 @@
"""Extractors for https://www.webtoons.com/""" """Extractors for https://www.webtoons.com/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import exception, text, util from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com"
LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)" LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
@@ -40,7 +40,7 @@ class WebtoonsBase():
def request(self, url, **kwargs): def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs) response = Extractor.request(self, url, **kwargs)
if response.history and "/ageGate" in response.url: if response.history and "/ageGate" in response.url:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"HTTP redirect to age gate check ('{response.url}')") f"HTTP redirect to age gate check ('{response.url}')")
return response return response

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.weibo.com/""" """Extractors for https://www.weibo.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
import random import random
@@ -65,7 +65,7 @@ class WeiboExtractor(Extractor):
if response.history: if response.history:
if "login.sina.com" in response.url: if "login.sina.com" in response.url:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"HTTP redirect to login page " f"HTTP redirect to login page "
f"({response.url.partition('?')[0]})") f"({response.url.partition('?')[0]})")
if "passport.weibo.com" in response.url: if "passport.weibo.com" in response.url:
@@ -189,7 +189,7 @@ class WeiboExtractor(Extractor):
not text.ext_from_url(video["url"]): not text.ext_from_url(video["url"]):
try: try:
video["url"] = self.request_location(video["url"]) video["url"] = self.request_location(video["url"])
except exception.HttpError as exc: except self.exc.HttpError as exc:
self.log.warning("%s: %s", exc.__class__.__name__, exc) self.log.warning("%s: %s", exc.__class__.__name__, exc)
video["url"] = "" video["url"] = ""
@@ -230,7 +230,7 @@ class WeiboExtractor(Extractor):
if not data.get("ok"): if not data.get("ok"):
self.log.debug(response.content) self.log.debug(response.content)
if "since_id" not in params: # first iteration if "since_id" not in params: # first iteration
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f'"{data.get("msg") or "unknown error"}"') f'"{data.get("msg") or "unknown error"}"')
try: try:
@@ -479,14 +479,14 @@ class WeiboAlbumExtractor(WeiboExtractor):
try: try:
sub = subalbums[int(subalbum)-1] sub = subalbums[int(subalbum)-1]
except Exception: except Exception:
raise exception.NotFoundError("subalbum") raise self.exc.NotFoundError("subalbum")
else: else:
subalbum = text.unquote(subalbum) subalbum = text.unquote(subalbum)
for sub in subalbums: for sub in subalbums:
if sub["pic_title"] == subalbum: if sub["pic_title"] == subalbum:
break break
else: else:
raise exception.NotFoundError("subalbum") raise self.exc.NotFoundError("subalbum")
return ((sub, self._pagination_subalbum(uid, sub)),) return ((sub, self._pagination_subalbum(uid, sub)),)
def _pagination_subalbum(self, uid, sub): def _pagination_subalbum(self, uid, sub):
@@ -504,7 +504,7 @@ class WeiboStatusExtractor(WeiboExtractor):
status = self._status_by_id(self.user) status = self._status_by_id(self.user)
if status.get("ok") != 1: if status.get("ok") != 1:
self.log.debug(status) self.log.debug(status)
raise exception.NotFoundError("status") raise self.exc.NotFoundError("status")
return (status,) return (status,)

View File

@@ -10,7 +10,7 @@
"""Extractors for Wikimedia sites""" """Extractors for Wikimedia sites"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
@@ -71,7 +71,7 @@ class WikimediaExtractor(BaseExtractor):
response = self.request(url, method="HEAD", fatal=None) response = self.request(url, method="HEAD", fatal=None)
if response.status_code < 400: if response.status_code < 400:
return url return url
raise exception.AbortExtraction("Unable to find API endpoint") raise self.exc.AbortExtraction("Unable to find API endpoint")
def prepare_info(self, info): def prepare_info(self, info):
"""Adjust the content of an image info object""" """Adjust the content of an image info object"""

View File

@@ -9,7 +9,7 @@
"""Extractors for XenForo forums""" """Extractors for XenForo forums"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text, util, exception from .. import text, util
from ..cache import cache from ..cache import cache
import binascii import binascii
@@ -165,7 +165,7 @@ class XenforoExtractor(BaseExtractor):
def request_page(self, url): def request_page(self, url):
try: try:
return self.request(url) return self.request(url)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 403 and b">Log in<" in exc.response.content: if exc.status == 403 and b">Log in<" in exc.response.content:
self._require_auth(exc.response) self._require_auth(exc.response)
raise raise
@@ -197,7 +197,7 @@ class XenforoExtractor(BaseExtractor):
if not response.history: if not response.history:
err = self._extract_error(response.text) err = self._extract_error(response.text)
err = f'"{err}"' if err else None err = f'"{err}"' if err else None
raise exception.AuthenticationError(err) raise self.exc.AuthenticationError(err)
return { return {
cookie.name: cookie.value cookie.name: cookie.value
@@ -420,7 +420,7 @@ class XenforoExtractor(BaseExtractor):
return main["contentUrl"], media return main["contentUrl"], media
def _require_auth(self, response=None): def _require_auth(self, response=None):
raise exception.AuthRequired( raise self.exc.AuthRequired(
("username & password", "authenticated cookies"), None, ("username & password", "authenticated cookies"), None,
None if response is None else self._extract_error(response.text)) None if response is None else self._extract_error(response.text))
@@ -473,7 +473,7 @@ class XenforoPostExtractor(XenforoExtractor):
pos = page.find(f'data-content="post-{post_id}"') pos = page.find(f'data-content="post-{post_id}"')
if pos < 0: if pos < 0:
raise exception.NotFoundError("post") raise self.exc.NotFoundError("post")
html = text.extract(page, "<article ", "<footer", pos-200)[0] html = text.extract(page, "<article ", "<footer", pos-200)[0]
self._parse_thread(page) self._parse_thread(page)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://xfolio.jp/""" """Extractors for https://xfolio.jp/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text
BASE_PATTERN = r"(?:https?://)?xfolio\.jp(?:/[^/?#]+)?" BASE_PATTERN = r"(?:https?://)?xfolio\.jp(?:/[^/?#]+)?"
@@ -38,7 +38,7 @@ class XfolioExtractor(Extractor):
response = Extractor.request(self, url, **kwargs) response = Extractor.request(self, url, **kwargs)
if "/system/recaptcha" in response.url: if "/system/recaptcha" in response.url:
raise exception.AbortExtraction("Bot check / CAPTCHA page") raise self.exc.AbortExtraction("Bot check / CAPTCHA page")
return response return response

View File

@@ -9,7 +9,7 @@
"""Extractors for sites supported by youtube-dl""" """Extractors for sites supported by youtube-dl"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import ytdl, config, exception from .. import ytdl, config
class YoutubeDLExtractor(Extractor): class YoutubeDLExtractor(Extractor):
@@ -39,7 +39,7 @@ class YoutubeDLExtractor(Extractor):
self.ytdl_ie_key = ie.ie_key() self.ytdl_ie_key = ie.ie_key()
break break
if not generic and self.ytdl_ie_key == "Generic": if not generic and self.ytdl_ie_key == "Generic":
raise exception.NoExtractorError() raise self.exc.NoExtractorError()
self.force_generic_extractor = False self.force_generic_extractor = False
if self.ytdl_ie_key == "Generic" and config.interpolate( if self.ytdl_ie_key == "Generic" and config.interpolate(
@@ -94,9 +94,9 @@ class YoutubeDLExtractor(Extractor):
ytdl_instance.get_info_extractor(self.ytdl_ie_key), ytdl_instance.get_info_extractor(self.ytdl_ie_key),
False, {}, True) False, {}, True)
# except ytdl_module.utils.YoutubeDLError: # except ytdl_module.utils.YoutubeDLError:
# raise exception.AbortExtraction("Failed to extract video data") # raise self.exc.AbortExtraction("Failed to extract video data")
except Exception as exc: except Exception as exc:
raise exception.AbortExtraction( raise self.exc.AbortExtraction(
f"Failed to extract video data " f"Failed to extract video data "
f"({exc.__class__.__name__}: {exc})") f"({exc.__class__.__name__}: {exc})")

View File

@@ -10,7 +10,7 @@
from .booru import BooruExtractor from .booru import BooruExtractor
from ..cache import cache from ..cache import cache
from .. import text, util, exception from .. import text, util
import collections import collections
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net" BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
@@ -59,7 +59,7 @@ class ZerochanExtractor(BooruExtractor):
response = self.request( response = self.request(
url, method="POST", headers=headers, data=data, expected=(500,)) url, method="POST", headers=headers, data=data, expected=(500,))
if not response.history: if not response.history:
raise exception.AuthenticationError() raise self.exc.AuthenticationError()
return response.cookies return response.cookies
@@ -196,7 +196,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
try: try:
page = self.request( page = self.request(
url, params=params, expected=(500,)).text url, params=params, expected=(500,)).text
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 404: if exc.status == 404:
return return
raise raise
@@ -241,7 +241,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
try: try:
response = self.request( response = self.request(
url, params=params, allow_redirects=False) url, params=params, allow_redirects=False)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status == 404: if exc.status == 404:
return return
raise raise
@@ -251,7 +251,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
self.log.warning("HTTP redirect to %s", url) self.log.warning("HTTP redirect to %s", url)
if self.config("redirects"): if self.config("redirects"):
continue continue
raise exception.AbortExtraction() raise self.exc.AbortExtraction()
data = response.json() data = response.json()
try: try:
@@ -293,7 +293,7 @@ class ZerochanImageExtractor(ZerochanExtractor):
try: try:
post = self._parse_entry_html(image_id) post = self._parse_entry_html(image_id)
except exception.HttpError as exc: except self.exc.HttpError as exc:
if exc.status in {404, 410}: if exc.status in {404, 410}:
if msg := text.extr(exc.response.text, "<h2>", "<"): if msg := text.extr(exc.response.text, "<h2>", "<"):
self.log.warning(f"'{msg}'") self.log.warning(f"'{msg}'")