[common] add reference to 'exception' module to Extractor class
- remove 'exception' imports - replace with 'self.exc'
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2024-2025 Mike Fährmann
|
# Copyright 2024-2026 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://archiveofourown.org/"""
|
"""Extractors for https://archiveofourown.org/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
|
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
|
||||||
@@ -88,11 +88,11 @@ class Ao3Extractor(Extractor):
|
|||||||
|
|
||||||
response = self.request(url, method="POST", data=data)
|
response = self.request(url, method="POST", data=data)
|
||||||
if not response.history:
|
if not response.history:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
remember = response.history[0].cookies.get("remember_user_token")
|
remember = response.history[0].cookies.get("remember_user_token")
|
||||||
if not remember:
|
if not remember:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"remember_user_token": remember,
|
"remember_user_token": remember,
|
||||||
@@ -142,12 +142,12 @@ class Ao3WorkExtractor(Ao3Extractor):
|
|||||||
response = self.request(url, notfound=True)
|
response = self.request(url, notfound=True)
|
||||||
|
|
||||||
if response.url.endswith("/users/login?restricted=true"):
|
if response.url.endswith("/users/login?restricted=true"):
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
"Login required to access member-only works")
|
"Login required to access member-only works")
|
||||||
page = response.text
|
page = response.text
|
||||||
if len(page) < 20000 and \
|
if len(page) < 20000 and \
|
||||||
'<h2 class="landmark heading">Adult Content Warning</' in page:
|
'<h2 class="landmark heading">Adult Content Warning</' in page:
|
||||||
raise exception.AbortExtraction("Adult Content")
|
raise self.exc.AbortExtraction("Adult Content")
|
||||||
|
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://arca.live/"""
|
"""Extractors for https://arca.live/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
|
||||||
|
|
||||||
@@ -173,7 +173,7 @@ class ArcaliveAPI():
|
|||||||
msg = "API request failed: " + msg
|
msg = "API request failed: " + msg
|
||||||
else:
|
else:
|
||||||
msg = "API request failed"
|
msg = "API request failed"
|
||||||
raise exception.AbortExtraction(msg)
|
raise self.exc.AbortExtraction(msg)
|
||||||
|
|
||||||
def _pagination(self, endpoint, params, key):
|
def _pagination(self, endpoint, params, key):
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.artstation.com/"""
|
"""Extractors for https://www.artstation.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
@@ -121,7 +121,7 @@ class ArtstationExtractor(Extractor):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
data = self.request_json(url)
|
data = self.request_json(url)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
self.log.warning(exc)
|
self.log.warning(exc)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -239,7 +239,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
|
|||||||
if album["id"] == self.album_id:
|
if album["id"] == self.album_id:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError("album")
|
raise self.exc.NotFoundError("album")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"userinfo": userinfo,
|
"userinfo": userinfo,
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://aryion.com/"""
|
"""Extractors for https://aryion.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, dt, exception
|
from .. import text, util, dt
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from email.utils import parsedate_tz
|
from email.utils import parsedate_tz
|
||||||
|
|
||||||
@@ -52,7 +52,7 @@ class AryionExtractor(Extractor):
|
|||||||
|
|
||||||
response = self.request(url, method="POST", data=data)
|
response = self.request(url, method="POST", data=data)
|
||||||
if b"You have been successfully logged in." not in response.content:
|
if b"You have been successfully logged in." not in response.content:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
return {c: response.cookies[c] for c in self.cookies_names}
|
return {c: response.cookies[c] for c in self.cookies_names}
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -258,7 +258,7 @@ class AryionWatchExtractor(AryionExtractor):
|
|||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
if not self.cookies_check(self.cookies_names):
|
if not self.cookies_check(self.cookies_names):
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
("username & password", "authenticated cookies"),
|
("username & password", "authenticated cookies"),
|
||||||
"watched Submissions")
|
"watched Submissions")
|
||||||
self.cookies.set("g4p_msgpage_style", "plain", domain="aryion.com")
|
self.cookies.set("g4p_msgpage_style", "plain", domain="aryion.com")
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.behance.net/"""
|
"""Extractors for https://www.behance.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
|
|
||||||
class BehanceExtractor(Extractor):
|
class BehanceExtractor(Extractor):
|
||||||
@@ -139,13 +139,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
|||||||
if not data["modules"]:
|
if not data["modules"]:
|
||||||
access = data.get("matureAccess")
|
access = data.get("matureAccess")
|
||||||
if access == "logged-out":
|
if access == "logged-out":
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
"Mature content galleries require logged-in cookies")
|
"Mature content galleries require logged-in cookies")
|
||||||
if access == "restricted-safe":
|
if access == "restricted-safe":
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
"Mature content blocked in account settings")
|
"Mature content blocked in account settings")
|
||||||
if access and access != "allowed":
|
if access and access != "allowed":
|
||||||
raise exception.AuthorizationError()
|
raise self.exc.AuthorizationError()
|
||||||
return ()
|
return ()
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.bellazon.com/"""
|
"""Extractors for https://www.bellazon.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?bellazon\.com/main"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?bellazon\.com/main"
|
||||||
|
|
||||||
@@ -207,7 +207,7 @@ class BellazonPostExtractor(BellazonExtractor):
|
|||||||
|
|
||||||
pos = page.find('id="elComment_' + post_id)
|
pos = page.find('id="elComment_' + post_id)
|
||||||
if pos < 0:
|
if pos < 0:
|
||||||
raise exception.NotFoundError("post")
|
raise self.exc.NotFoundError("post")
|
||||||
html = text.extract(page, "<article ", "</article>", pos-100)[0]
|
html = text.extract(page, "<article ", "</article>", pos-100)[0]
|
||||||
|
|
||||||
self.kwdict["thread"] = self._parse_thread(page)
|
self.kwdict["thread"] = self._parse_thread(page)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://www.bilibili.com/"""
|
"""Extractors for https://www.bilibili.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
|
|
||||||
class BilibiliExtractor(Extractor):
|
class BilibiliExtractor(Extractor):
|
||||||
@@ -123,7 +123,7 @@ class BilibiliAPI():
|
|||||||
|
|
||||||
if data["code"]:
|
if data["code"]:
|
||||||
self.extractor.log.debug("Server response: %s", data)
|
self.extractor.log.debug("Server response: %s", data)
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@@ -151,7 +151,7 @@ class BilibiliAPI():
|
|||||||
page, "window.__INITIAL_STATE__=", "};") + "}")
|
page, "window.__INITIAL_STATE__=", "};") + "}")
|
||||||
except Exception:
|
except Exception:
|
||||||
if "window._riskdata_" not in page:
|
if "window._riskdata_" not in page:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
article_id + ": Unable to extract INITIAL_STATE data")
|
article_id + ": Unable to extract INITIAL_STATE data")
|
||||||
self.extractor.wait(seconds=300)
|
self.extractor.wait(seconds=300)
|
||||||
|
|
||||||
@@ -174,9 +174,9 @@ class BilibiliAPI():
|
|||||||
|
|
||||||
if data["code"] != 0:
|
if data["code"] != 0:
|
||||||
self.extractor.log.debug("Server response: %s", data)
|
self.extractor.log.debug("Server response: %s", data)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"API request failed. Are you logges in?")
|
"API request failed. Are you logges in?")
|
||||||
try:
|
try:
|
||||||
return data["data"]["profile"]["mid"]
|
return data["data"]["profile"]["mid"]
|
||||||
except Exception:
|
except Exception:
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://bsky.app/"""
|
"""Extractors for https://bsky.app/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
|
|
||||||
BASE_PATTERN = (r"(?:https?://)?"
|
BASE_PATTERN = (r"(?:https?://)?"
|
||||||
@@ -96,7 +96,7 @@ class BlueskyExtractor(Extractor):
|
|||||||
uri = record["value"]["subject"]["uri"]
|
uri = record["value"]["subject"]["uri"]
|
||||||
if "/app.bsky.feed.post/" in uri:
|
if "/app.bsky.feed.post/" in uri:
|
||||||
yield from self.api.get_post_thread_uri(uri, depth)
|
yield from self.api.get_post_thread_uri(uri, depth)
|
||||||
except exception.ControlException:
|
except self.exc.ControlException:
|
||||||
pass # deleted post
|
pass # deleted post
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.log.debug(record, exc_info=exc)
|
self.log.debug(record, exc_info=exc)
|
||||||
@@ -569,7 +569,7 @@ class BlueskyAPI():
|
|||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
self.log.debug("Server response: %s", data)
|
self.log.debug("Server response: %s", data)
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
f"\"{data.get('error')}: {data.get('message')}\"")
|
f"\"{data.get('error')}: {data.get('message')}\"")
|
||||||
|
|
||||||
_refresh_token_cache.update(self.username, data["refreshJwt"])
|
_refresh_token_cache.update(self.username, data["refreshJwt"])
|
||||||
@@ -600,7 +600,7 @@ class BlueskyAPI():
|
|||||||
msg = f"{msg} ({response.status_code} {response.reason})"
|
msg = f"{msg} ({response.status_code} {response.reason})"
|
||||||
|
|
||||||
self.extractor.log.debug("Server response: %s", response.text)
|
self.extractor.log.debug("Server response: %s", response.text)
|
||||||
raise exception.AbortExtraction(msg)
|
raise self.exc.AbortExtraction(msg)
|
||||||
|
|
||||||
def _pagination(self, endpoint, params,
|
def _pagination(self, endpoint, params,
|
||||||
key="feed", root=None, check_empty=False):
|
key="feed", root=None, check_empty=False):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://www.boosty.to/"""
|
"""Extractors for https://www.boosty.to/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?boosty\.to"
|
BASE_PATTERN = r"(?:https?://)?boosty\.to"
|
||||||
@@ -380,14 +380,14 @@ class BoostyAPI():
|
|||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
elif response.status_code < 400:
|
elif response.status_code < 400:
|
||||||
raise exception.AuthenticationError("Invalid API access token")
|
raise self.exc.AuthenticationError("Invalid API access token")
|
||||||
|
|
||||||
elif response.status_code == 429:
|
elif response.status_code == 429:
|
||||||
self.extractor.wait(seconds=600)
|
self.extractor.wait(seconds=600)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.extractor.log.debug(response.text)
|
self.extractor.log.debug(response.text)
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|
||||||
def _pagination(self, endpoint, params, transform=None, key=None):
|
def _pagination(self, endpoint, params, transform=None, key=None):
|
||||||
if "is_only_allowed" not in params and self.extractor.only_allowed:
|
if "is_only_allowed" not in params and self.extractor.only_allowed:
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
from .common import Extractor
|
from .common import Extractor
|
||||||
from .lolisafe import LolisafeAlbumExtractor
|
from .lolisafe import LolisafeAlbumExtractor
|
||||||
from .. import text, util, config, exception
|
from .. import text, util, config
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@@ -110,7 +110,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
self.log.debug("Redirect to known CF challenge domain '%s'",
|
self.log.debug("Redirect to known CF challenge domain '%s'",
|
||||||
root)
|
root)
|
||||||
|
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status != 403:
|
if exc.status != 403:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@@ -125,7 +125,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
if not DOMAINS:
|
if not DOMAINS:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"All Bunkr domains require solving a CF challenge")
|
"All Bunkr domains require solving a CF challenge")
|
||||||
|
|
||||||
# select alternative domain
|
# select alternative domain
|
||||||
@@ -172,15 +172,15 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
|
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
|
||||||
|
|
||||||
yield file
|
yield file
|
||||||
except exception.ControlException:
|
except self.exc.ControlException:
|
||||||
raise
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
||||||
self.log.debug("%s", item, exc_info=exc)
|
self.log.debug("%s", item, exc_info=exc)
|
||||||
if isinstance(exc, exception.HttpError) and \
|
if isinstance(exc, self.exc.HttpError) and \
|
||||||
exc.status == 400 and \
|
exc.status == 400 and \
|
||||||
exc.response.url.startswith(self.root_api):
|
exc.response.url.startswith(self.root_api):
|
||||||
raise exception.AbortExtraction("Album deleted")
|
raise self.exc.AbortExtraction("Album deleted")
|
||||||
|
|
||||||
def _extract_file(self, data_id):
|
def _extract_file(self, data_id):
|
||||||
referer = f"{self.root_dl}/file/{data_id}"
|
referer = f"{self.root_dl}/file/{data_id}"
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.civitai.com/"""
|
"""Extractors for https://www.civitai.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
import itertools
|
import itertools
|
||||||
import time
|
import time
|
||||||
@@ -201,7 +201,7 @@ class CivitaiExtractor(Extractor):
|
|||||||
if "Authorization" not in self.api.headers and \
|
if "Authorization" not in self.api.headers and \
|
||||||
not self.cookies.get(
|
not self.cookies.get(
|
||||||
"__Secure-civitai-token", domain=".civitai.com"):
|
"__Secure-civitai-token", domain=".civitai.com"):
|
||||||
raise exception.AuthRequired(("api-key", "authenticated cookies"))
|
raise self.exc.AuthRequired(("api-key", "authenticated cookies"))
|
||||||
|
|
||||||
def _parse_query(self, value):
|
def _parse_query(self, value):
|
||||||
return text.parse_query_list(
|
return text.parse_query_list(
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://comick.io/"""
|
"""Extractors for https://comick.io/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
|
from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io"
|
||||||
@@ -71,7 +71,7 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
props = _chapter_info(self, manga, chstr)
|
props = _chapter_info(self, manga, chstr)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.response.status_code != 404:
|
if exc.response.status_code != 404:
|
||||||
raise
|
raise
|
||||||
if exc.response.headers.get(
|
if exc.response.headers.get(
|
||||||
@@ -84,7 +84,7 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
|
|||||||
manga = _manga_info(self, slug)
|
manga = _manga_info(self, slug)
|
||||||
continue
|
continue
|
||||||
if b'"notFound":true' in exc.response.content:
|
if b'"notFound":true' in exc.response.content:
|
||||||
raise exception.NotFoundError("chapter")
|
raise self.exc.NotFoundError("chapter")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if "__N_REDIRECT" in props:
|
if "__N_REDIRECT" in props:
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ class Extractor():
|
|||||||
request_interval_429 = 60.0
|
request_interval_429 = 60.0
|
||||||
request_timestamp = 0.0
|
request_timestamp = 0.0
|
||||||
finalize = skip = None
|
finalize = skip = None
|
||||||
|
exc = exception
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.log = logging.getLogger(self.category)
|
self.log = logging.getLogger(self.category)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://cyberfile.me/"""
|
"""Extractors for https://cyberfile.me/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberfile\.me"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberfile\.me"
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@ class CyberfileExtractor(Extractor):
|
|||||||
resp = self.request_json(
|
resp = self.request_json(
|
||||||
url_pw, method="POST", headers=headers, data=data_pw)
|
url_pw, method="POST", headers=headers, data=data_pw)
|
||||||
if not resp.get("success"):
|
if not resp.get("success"):
|
||||||
raise exception.AuthorizationError(f"'{resp.get('msg')}'")
|
raise self.exc.AuthorizationError(f"'{resp.get('msg')}'")
|
||||||
resp = self.request_json(
|
resp = self.request_json(
|
||||||
url, method="POST", headers=headers, data=data)
|
url, method="POST", headers=headers, data=data)
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.deviantart.com/"""
|
"""Extractors for https://www.deviantart.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, dt, exception
|
from .. import text, util, dt
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import collections
|
import collections
|
||||||
import mimetypes
|
import mimetypes
|
||||||
@@ -123,7 +123,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
self.group = False
|
self.group = False
|
||||||
elif group == "skip":
|
elif group == "skip":
|
||||||
self.log.info("Skipping group '%s'", self.user)
|
self.log.info("Skipping group '%s'", self.user)
|
||||||
raise exception.AbortExtraction()
|
raise self.exc.AbortExtraction()
|
||||||
else:
|
else:
|
||||||
self.subcategory = "group-" + self.subcategory
|
self.subcategory = "group-" + self.subcategory
|
||||||
self.group = True
|
self.group = True
|
||||||
@@ -457,7 +457,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
for subfolder in folder["subfolders"]:
|
for subfolder in folder["subfolders"]:
|
||||||
if subfolder["folderid"] == uuid:
|
if subfolder["folderid"] == uuid:
|
||||||
return subfolder
|
return subfolder
|
||||||
raise exception.NotFoundError("folder")
|
raise self.exc.NotFoundError("folder")
|
||||||
|
|
||||||
def _folder_urls(self, folders, category, extractor):
|
def _folder_urls(self, folders, category, extractor):
|
||||||
base = f"{self.root}/{self.user}/{category}/"
|
base = f"{self.root}/{self.user}/{category}/"
|
||||||
@@ -1027,7 +1027,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
|||||||
page = self._limited_request(url, notfound=True).text
|
page = self._limited_request(url, notfound=True).text
|
||||||
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
|
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
|
||||||
if not uuid:
|
if not uuid:
|
||||||
raise exception.NotFoundError("deviation")
|
raise self.exc.NotFoundError("deviation")
|
||||||
|
|
||||||
deviation = self.api.deviation(uuid)
|
deviation = self.api.deviation(uuid)
|
||||||
deviation["_page"] = page
|
deviation["_page"] = page
|
||||||
@@ -1111,7 +1111,7 @@ class DeviantartSearchExtractor(DeviantartExtractor):
|
|||||||
response = self.request(url, params=params)
|
response = self.request(url, params=params)
|
||||||
|
|
||||||
if response.history and "/users/login" in response.url:
|
if response.history and "/users/login" in response.url:
|
||||||
raise exception.AbortExtraction("HTTP redirect to login page")
|
raise self.exc.AbortExtraction("HTTP redirect to login page")
|
||||||
page = response.text
|
page = response.text
|
||||||
|
|
||||||
for user, type, did in find(page)[:-3:3]:
|
for user, type, did in find(page)[:-3:3]:
|
||||||
@@ -1476,7 +1476,7 @@ class DeviantartOAuthAPI():
|
|||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
self.log.debug("Server response: %s", data)
|
self.log.debug("Server response: %s", data)
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
f"\"{data.get('error_description')}\" ({data.get('error')})")
|
f"\"{data.get('error_description')}\" ({data.get('error')})")
|
||||||
if refresh_token_key:
|
if refresh_token_key:
|
||||||
_refresh_token_cache.update(
|
_refresh_token_cache.update(
|
||||||
@@ -1515,9 +1515,9 @@ class DeviantartOAuthAPI():
|
|||||||
|
|
||||||
error = data.get("error_description")
|
error = data.get("error_description")
|
||||||
if error == "User not found.":
|
if error == "User not found.":
|
||||||
raise exception.NotFoundError("user or group")
|
raise self.exc.NotFoundError("user or group")
|
||||||
if error == "Deviation not downloadable.":
|
if error == "Deviation not downloadable.":
|
||||||
raise exception.AuthorizationError()
|
raise self.exc.AuthorizationError()
|
||||||
|
|
||||||
self.log.debug(response.text)
|
self.log.debug(response.text)
|
||||||
msg = f"API responded with {status} {response.reason}"
|
msg = f"API responded with {status} {response.reason}"
|
||||||
@@ -1808,7 +1808,7 @@ class DeviantartEclipseAPI():
|
|||||||
|
|
||||||
pos = page.find('\\"name\\":\\"watching\\"')
|
pos = page.find('\\"name\\":\\"watching\\"')
|
||||||
if pos < 0:
|
if pos < 0:
|
||||||
raise exception.NotFoundError("'watching' module ID")
|
raise self.exc.NotFoundError("'watching' module ID")
|
||||||
module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
|
module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
|
||||||
|
|
||||||
self._fetch_csrf_token(page)
|
self._fetch_csrf_token(page)
|
||||||
@@ -1863,7 +1863,7 @@ def _login_impl(extr, username, password):
|
|||||||
response = extr.request(url, method="POST", data=data)
|
response = extr.request(url, method="POST", data=data)
|
||||||
|
|
||||||
if not response.history:
|
if not response.history:
|
||||||
raise exception.AuthenticationError()
|
raise extr.exc.AuthenticationError()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
cookie.name: cookie.value
|
cookie.name: cookie.value
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://discord.com/"""
|
"""Extractors for https://discord.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?discord\.com"
|
BASE_PATTERN = r"(?:https?://)?discord\.com"
|
||||||
|
|
||||||
@@ -167,10 +167,10 @@ class DiscordExtractor(Extractor):
|
|||||||
yield from self.extract_channel(
|
yield from self.extract_channel(
|
||||||
channel["channel_id"], safe=True)
|
channel["channel_id"], safe=True)
|
||||||
elif not safe:
|
elif not safe:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"This channel type is not supported."
|
"This channel type is not supported."
|
||||||
)
|
)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if not (exc.status == 403 and safe):
|
if not (exc.status == 403 and safe):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@@ -474,7 +474,7 @@ class DiscordAPI():
|
|||||||
try:
|
try:
|
||||||
response = self.extractor.request(
|
response = self.extractor.request(
|
||||||
url, params=params, headers=self.headers)
|
url, params=params, headers=self.headers)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 401:
|
if exc.status == 401:
|
||||||
self._raise_invalid_token()
|
self._raise_invalid_token()
|
||||||
raise
|
raise
|
||||||
@@ -490,7 +490,7 @@ class DiscordAPI():
|
|||||||
offset += len(data)
|
offset += len(data)
|
||||||
|
|
||||||
def _raise_invalid_token(self):
|
def _raise_invalid_token(self):
|
||||||
raise exception.AuthenticationError("""Invalid or missing token.
|
raise self.exc.AuthenticationError("""Invalid or missing token.
|
||||||
Please provide a valid token following these instructions:
|
Please provide a valid token following these instructions:
|
||||||
|
|
||||||
1) Open Discord in your browser (https://discord.com/app);
|
1) Open Discord in your browser (https://discord.com/app);
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.erome.com/"""
|
"""Extractors for https://www.erome.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
@@ -74,12 +74,12 @@ class EromeAlbumExtractor(EromeExtractor):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 410:
|
if exc.status == 410:
|
||||||
msg = text.extr(exc.response.text, "<h1>", "<")
|
msg = text.extr(exc.response.text, "<h1>", "<")
|
||||||
else:
|
else:
|
||||||
msg = "Unable to fetch album page"
|
msg = "Unable to fetch album page"
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"{album_id}: {msg} ({exc})")
|
f"{album_id}: {msg} ({exc})")
|
||||||
|
|
||||||
title, pos = text.extract(
|
title, pos = text.extract(
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://e-hentai.org/ and https://exhentai.org/"""
|
"""Extractors for https://e-hentai.org/ and https://exhentai.org/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import collections
|
import collections
|
||||||
import itertools
|
import itertools
|
||||||
@@ -53,13 +53,13 @@ class ExhentaiExtractor(Extractor):
|
|||||||
response = Extractor.request(self, url, **kwargs)
|
response = Extractor.request(self, url, **kwargs)
|
||||||
if "Cache-Control" not in response.headers and not response.content:
|
if "Cache-Control" not in response.headers and not response.content:
|
||||||
self.log.info("blank page")
|
self.log.info("blank page")
|
||||||
raise exception.AuthorizationError()
|
raise self.exc.AuthorizationError()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def login(self):
|
def login(self):
|
||||||
"""Login and set necessary cookies"""
|
"""Login and set necessary cookies"""
|
||||||
if self.LIMIT:
|
if self.LIMIT:
|
||||||
raise exception.AbortExtraction("Image limit reached!")
|
raise self.exc.AbortExtraction("Image limit reached!")
|
||||||
|
|
||||||
if self.cookies_check(self.cookies_names):
|
if self.cookies_check(self.cookies_names):
|
||||||
return
|
return
|
||||||
@@ -99,9 +99,9 @@ class ExhentaiExtractor(Extractor):
|
|||||||
content = response.content
|
content = response.content
|
||||||
if b"You are now logged in as:" not in content:
|
if b"You are now logged in as:" not in content:
|
||||||
if b"The captcha was not entered correctly" in content:
|
if b"The captcha was not entered correctly" in content:
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
"CAPTCHA required. Use cookies instead.")
|
"CAPTCHA required. Use cookies instead.")
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
# collect more cookies
|
# collect more cookies
|
||||||
url = self.root + "/favorites.php"
|
url = self.root + "/favorites.php"
|
||||||
@@ -187,7 +187,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
self.image_token = text.extr(gpage, 'hentai.org/s/', '"')
|
self.image_token = text.extr(gpage, 'hentai.org/s/', '"')
|
||||||
if not self.image_token:
|
if not self.image_token:
|
||||||
self.log.debug("Page content:\n%s", gpage)
|
self.log.debug("Page content:\n%s", gpage)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"Failed to extract initial image token")
|
"Failed to extract initial image token")
|
||||||
ipage = self._image_page()
|
ipage = self._image_page()
|
||||||
else:
|
else:
|
||||||
@@ -195,7 +195,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
part = text.extr(ipage, 'hentai.org/g/', '"')
|
part = text.extr(ipage, 'hentai.org/g/', '"')
|
||||||
if not part:
|
if not part:
|
||||||
self.log.debug("Page content:\n%s", ipage)
|
self.log.debug("Page content:\n%s", ipage)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"Failed to extract gallery token")
|
"Failed to extract gallery token")
|
||||||
self.gallery_token = part.split("/")[1]
|
self.gallery_token = part.split("/")[1]
|
||||||
gpage = self._gallery_page()
|
gpage = self._gallery_page()
|
||||||
@@ -313,7 +313,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
|
|
||||||
data = self.request_json(self.api_url, method="POST", json=data)
|
data = self.request_json(self.api_url, method="POST", json=data)
|
||||||
if "error" in data:
|
if "error" in data:
|
||||||
raise exception.AbortExtraction(data["error"])
|
raise self.exc.AbortExtraction(data["error"])
|
||||||
|
|
||||||
return data["gmetadata"][0]
|
return data["gmetadata"][0]
|
||||||
|
|
||||||
@@ -338,7 +338,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
data["_fallback"] = self._fallback_1280(nl, self.image_num)
|
data["_fallback"] = self._fallback_1280(nl, self.image_num)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
self.log.debug("Page content:\n%s", page)
|
self.log.debug("Page content:\n%s", page)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Unable to parse image info for '{url}'")
|
f"Unable to parse image info for '{url}'")
|
||||||
|
|
||||||
data["num"] = self.image_num
|
data["num"] = self.image_num
|
||||||
@@ -389,7 +389,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
nl, request["page"], imgkey)
|
nl, request["page"], imgkey)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
self.log.debug("Page content:\n%s", page)
|
self.log.debug("Page content:\n%s", page)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Unable to parse image info for '{url}'")
|
f"Unable to parse image info for '{url}'")
|
||||||
|
|
||||||
data["num"] = request["page"]
|
data["num"] = request["page"]
|
||||||
@@ -438,7 +438,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
data["_fallback"] = self._fallback_mpv_1280(info, request)
|
data["_fallback"] = self._fallback_mpv_1280(info, request)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
self.log.debug("Page content:\n%s", info)
|
self.log.debug("Page content:\n%s", info)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Unable to parse image info for '{url}'")
|
f"Unable to parse image info for '{url}'")
|
||||||
|
|
||||||
data["num"] = pnum
|
data["num"] = pnum
|
||||||
@@ -465,7 +465,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
if " requires GP" in page:
|
if " requires GP" in page:
|
||||||
gp = self.config("gp")
|
gp = self.config("gp")
|
||||||
if gp == "stop":
|
if gp == "stop":
|
||||||
raise exception.AbortExtraction("Not enough GP")
|
raise self.exc.AbortExtraction("Not enough GP")
|
||||||
elif gp == "wait":
|
elif gp == "wait":
|
||||||
self.input("Press ENTER to continue.")
|
self.input("Press ENTER to continue.")
|
||||||
return response.url
|
return response.url
|
||||||
@@ -475,7 +475,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
return self.data["_url_1280"]
|
return self.data["_url_1280"]
|
||||||
|
|
||||||
if " temporarily banned " in page:
|
if " temporarily banned " in page:
|
||||||
raise exception.AuthorizationError("Temporarily Banned")
|
raise self.exc.AuthorizationError("Temporarily Banned")
|
||||||
|
|
||||||
self._limits_exceeded()
|
self._limits_exceeded()
|
||||||
return response.url
|
return response.url
|
||||||
@@ -526,7 +526,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
|
|
||||||
if not action or action == "stop":
|
if not action or action == "stop":
|
||||||
ExhentaiExtractor.LIMIT = True
|
ExhentaiExtractor.LIMIT = True
|
||||||
raise exception.AbortExtraction(msg)
|
raise self.exc.AbortExtraction(msg)
|
||||||
|
|
||||||
self.log.warning(msg)
|
self.log.warning(msg)
|
||||||
if action == "wait":
|
if action == "wait":
|
||||||
@@ -559,12 +559,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
page = response.text
|
page = response.text
|
||||||
|
|
||||||
if response.status_code == 404 and "Gallery Not Available" in page:
|
if response.status_code == 404 and "Gallery Not Available" in page:
|
||||||
raise exception.AuthorizationError()
|
raise self.exc.AuthorizationError()
|
||||||
if page.startswith(("Key missing", "Gallery not found")):
|
if page.startswith(("Key missing", "Gallery not found")):
|
||||||
raise exception.NotFoundError("gallery")
|
raise self.exc.NotFoundError("gallery")
|
||||||
if page.count("hentai.org/mpv/") > 1:
|
if page.count("hentai.org/mpv/") > 1:
|
||||||
if self.gallery_token is None:
|
if self.gallery_token is None:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"'/s/' URLs in MPV mode are not supported")
|
"'/s/' URLs in MPV mode are not supported")
|
||||||
self.mpv = True
|
self.mpv = True
|
||||||
return page
|
return page
|
||||||
@@ -575,7 +575,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
page = self.request(url, fatal=False).text
|
page = self.request(url, fatal=False).text
|
||||||
|
|
||||||
if page.startswith(("Invalid page", "Keep trying")):
|
if page.startswith(("Invalid page", "Keep trying")):
|
||||||
raise exception.NotFoundError("image page")
|
raise self.exc.NotFoundError("image page")
|
||||||
return page
|
return page
|
||||||
|
|
||||||
def _fallback_original(self, nl, fullimg):
|
def _fallback_original(self, nl, fullimg):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://www.facebook.com/"""
|
"""Extractors for https://www.facebook.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com"
|
||||||
@@ -236,12 +236,12 @@ class FacebookExtractor(Extractor):
|
|||||||
res = self.request(url, **kwargs)
|
res = self.request(url, **kwargs)
|
||||||
|
|
||||||
if res.url.startswith(self.root + "/login"):
|
if res.url.startswith(self.root + "/login"):
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
message=("You must be logged in to continue viewing images." +
|
message=("You must be logged in to continue viewing images." +
|
||||||
LEFT_OFF_TXT))
|
LEFT_OFF_TXT))
|
||||||
|
|
||||||
if b'{"__dr":"CometErrorRoot.react"}' in res.content:
|
if b'{"__dr":"CometErrorRoot.react"}' in res.content:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"You've been temporarily blocked from viewing images.\n"
|
"You've been temporarily blocked from viewing images.\n"
|
||||||
"Please try using a different account, "
|
"Please try using a different account, "
|
||||||
"using a VPN or waiting before you retry." + LEFT_OFF_TXT)
|
"using a VPN or waiting before you retry." + LEFT_OFF_TXT)
|
||||||
@@ -331,7 +331,7 @@ class FacebookExtractor(Extractor):
|
|||||||
break
|
break
|
||||||
if ('"props":{"title":"This content isn\'t available right now"' in
|
if ('"props":{"title":"This content isn\'t available right now"' in
|
||||||
page):
|
page):
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
"authenticated cookies", "profile",
|
"authenticated cookies", "profile",
|
||||||
"This content isn't available right now")
|
"This content isn't available right now")
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://fansly.com/"""
|
"""Extractors for https://fansly.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
import time
|
import time
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com"
|
||||||
@@ -54,7 +54,7 @@ class FanslyExtractor(Extractor):
|
|||||||
if wall["id"] == wall_id:
|
if wall["id"] == wall_id:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError("wall")
|
raise self.exc.NotFoundError("wall")
|
||||||
walls = (wall,)
|
walls = (wall,)
|
||||||
|
|
||||||
for wall in walls:
|
for wall in walls:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://fapello.com/"""
|
"""Extractors for https://fapello.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fapello\.(?:com|su)"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fapello\.(?:com|su)"
|
||||||
@@ -34,7 +34,7 @@ class FapelloPostExtractor(Extractor):
|
|||||||
self.request(url, allow_redirects=False).text,
|
self.request(url, allow_redirects=False).text,
|
||||||
'class="uk-align-center"', "</div>", None)
|
'class="uk-align-center"', "</div>", None)
|
||||||
if page is None:
|
if page is None:
|
||||||
raise exception.NotFoundError("post")
|
raise self.exc.NotFoundError("post")
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://fikfap.com/"""
|
"""Extractors for https://fikfap.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fikfap\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fikfap\.com"
|
||||||
|
|
||||||
@@ -78,7 +78,7 @@ class FikfapPostExtractor(FikfapExtractor):
|
|||||||
|
|
||||||
if post["postId"] == int(pid):
|
if post["postId"] == int(pid):
|
||||||
return (post,)
|
return (post,)
|
||||||
raise exception.NotFoundError("post")
|
raise self.exc.NotFoundError("post")
|
||||||
|
|
||||||
|
|
||||||
class FikfapUserExtractor(FikfapExtractor):
|
class FikfapUserExtractor(FikfapExtractor):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.flickr.com/"""
|
"""Extractors for https://www.flickr.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, oauth, util, exception
|
from .. import text, oauth, util
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|secure\.|m\.)?flickr\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.|secure\.|m\.)?flickr\.com"
|
||||||
@@ -459,14 +459,14 @@ class FlickrAPI(oauth.OAuth1API):
|
|||||||
msg = data.get("message", "")
|
msg = data.get("message", "")
|
||||||
self.log.debug("Server response: %s", data)
|
self.log.debug("Server response: %s", data)
|
||||||
if data["code"] == 1:
|
if data["code"] == 1:
|
||||||
raise exception.NotFoundError(self.extractor.subcategory)
|
raise self.exc.NotFoundError(self.extractor.subcategory)
|
||||||
elif data["code"] == 2:
|
elif data["code"] == 2:
|
||||||
raise exception.AuthorizationError(msg)
|
raise self.exc.AuthorizationError(msg)
|
||||||
elif data["code"] == 98:
|
elif data["code"] == 98:
|
||||||
raise exception.AuthenticationError(msg)
|
raise self.exc.AuthenticationError(msg)
|
||||||
elif data["code"] == 99:
|
elif data["code"] == 99:
|
||||||
raise exception.AuthorizationError(msg)
|
raise self.exc.AuthorizationError(msg)
|
||||||
raise exception.AbortExtraction("API request failed: " + msg)
|
raise self.exc.AbortExtraction("API request failed: " + msg)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _pagination(self, method, params, key="photos"):
|
def _pagination(self, method, params, key="photos"):
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from . import gelbooru_v02
|
from . import gelbooru_v02
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
import binascii
|
import binascii
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?"
|
||||||
@@ -33,9 +33,9 @@ class GelbooruBase():
|
|||||||
url = self.root + "/index.php?page=dapi&q=index&json=1"
|
url = self.root + "/index.php?page=dapi&q=index&json=1"
|
||||||
try:
|
try:
|
||||||
data = self.request_json(url, params=params)
|
data = self.request_json(url, params=params)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 401:
|
if exc.status == 401:
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
"'api-key' & 'user-id'", "the API")
|
"'api-key' & 'user-id'", "the API")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@@ -172,7 +172,7 @@ class GelbooruPoolExtractor(GelbooruBase,
|
|||||||
|
|
||||||
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
|
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
|
||||||
if not name:
|
if not name:
|
||||||
raise exception.NotFoundError("pool")
|
raise self.exc.NotFoundError("pool")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"pool": text.parse_int(self.pool_id),
|
"pool": text.parse_int(self.pool_id),
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for Gelbooru Beta 0.2 sites"""
|
"""Extractors for Gelbooru Beta 0.2 sites"""
|
||||||
|
|
||||||
from . import booru
|
from . import booru
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
|
|
||||||
@@ -38,9 +38,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
if root.tag == "error":
|
if root.tag == "error":
|
||||||
msg = root.text
|
msg = root.text
|
||||||
if msg.lower().startswith("missing authentication"):
|
if msg.lower().startswith("missing authentication"):
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
"'api-key' & 'user-id'", "the API", msg)
|
"'api-key' & 'user-id'", "the API", msg)
|
||||||
raise exception.AbortExtraction(f"'{msg}'")
|
raise self.exc.AbortExtraction(f"'{msg}'")
|
||||||
|
|
||||||
return root
|
return root
|
||||||
|
|
||||||
@@ -229,7 +229,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
|
|||||||
|
|
||||||
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
|
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
|
||||||
if not name:
|
if not name:
|
||||||
raise exception.NotFoundError("pool")
|
raise self.exc.NotFoundError("pool")
|
||||||
self.post_ids = text.extract_iter(
|
self.post_ids = text.extract_iter(
|
||||||
page, 'class="thumb" id="p', '"', pos)
|
page, 'class="thumb" id="p', '"', pos)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
|
||||||
@@ -46,13 +46,13 @@ class GirlswithmuscleExtractor(Extractor):
|
|||||||
url, method="POST", headers=headers, data=data)
|
url, method="POST", headers=headers, data=data)
|
||||||
|
|
||||||
if not response.history:
|
if not response.history:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
page = response.text
|
page = response.text
|
||||||
if ">Wrong username or password" in page:
|
if ">Wrong username or password" in page:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
if ">Log in<" in page:
|
if ">Log in<" in page:
|
||||||
raise exception.AuthenticationError("Account data is missing")
|
raise self.exc.AuthenticationError("Account data is missing")
|
||||||
|
|
||||||
return {c.name: c.value for c in response.history[0].cookies}
|
return {c.name: c.value for c in response.history[0].cookies}
|
||||||
|
|
||||||
@@ -69,7 +69,7 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
|
|||||||
url = f"{self.root}/{self.groups[0]}/"
|
url = f"{self.root}/{self.groups[0]}/"
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
if not page:
|
if not page:
|
||||||
raise exception.NotFoundError("post")
|
raise self.exc.NotFoundError("post")
|
||||||
|
|
||||||
metadata = self.metadata(page)
|
metadata = self.metadata(page)
|
||||||
|
|
||||||
@@ -152,7 +152,7 @@ class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor):
|
|||||||
response = self.request(url)
|
response = self.request(url)
|
||||||
if response.history:
|
if response.history:
|
||||||
msg = f'Request was redirected to "{response.url}", try logging in'
|
msg = f'Request was redirected to "{response.url}", try logging in'
|
||||||
raise exception.AuthorizationError(msg)
|
raise self.exc.AuthorizationError(msg)
|
||||||
page = response.text
|
page = response.text
|
||||||
|
|
||||||
match = text.re(r"Page (\d+) of (\d+)").search(page)
|
match = text.re(r"Page (\d+) of (\d+)").search(page)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://gofile.io/"""
|
"""Extractors for https://gofile.io/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
@@ -44,7 +44,7 @@ class GofileFolderExtractor(Extractor):
|
|||||||
try:
|
try:
|
||||||
contents = folder.pop("children")
|
contents = folder.pop("children")
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AuthorizationError("Password required")
|
raise self.exc.AuthorizationError("Password required")
|
||||||
|
|
||||||
num = 0
|
num = 0
|
||||||
for content in contents.values():
|
for content in contents.values():
|
||||||
@@ -94,10 +94,10 @@ class GofileFolderExtractor(Extractor):
|
|||||||
|
|
||||||
if response["status"] != "ok":
|
if response["status"] != "ok":
|
||||||
if response["status"] == "error-notFound":
|
if response["status"] == "error-notFound":
|
||||||
raise exception.NotFoundError("content")
|
raise self.exc.NotFoundError("content")
|
||||||
if response["status"] == "error-passwordRequired":
|
if response["status"] == "error-passwordRequired":
|
||||||
raise exception.AuthorizationError("Password required")
|
raise self.exc.AuthorizationError("Password required")
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"{endpoint} failed (Status: {response['status']})")
|
f"{endpoint} failed (Status: {response['status']})")
|
||||||
|
|
||||||
return response["data"]
|
return response["data"]
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://hotleak.vip/"""
|
"""Extractors for https://hotleak.vip/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
import binascii
|
import binascii
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip"
|
||||||
@@ -116,7 +116,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
|
|||||||
try:
|
try:
|
||||||
response = self.request(
|
response = self.request(
|
||||||
url, headers=headers, params=params, notfound=True)
|
url, headers=headers, params=params, notfound=True)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.response.status_code == 429:
|
if exc.response.status_code == 429:
|
||||||
self.wait(
|
self.wait(
|
||||||
until=exc.response.headers.get("X-RateLimit-Reset"))
|
until=exc.response.headers.get("X-RateLimit-Reset"))
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.imagebam.com/"""
|
"""Extractors for https://www.imagebam.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class ImagebamExtractor(Extractor):
|
class ImagebamExtractor(Extractor):
|
||||||
@@ -29,7 +29,7 @@ class ImagebamExtractor(Extractor):
|
|||||||
page = self.request(self.root + path).text
|
page = self.request(self.root + path).text
|
||||||
url, pos = text.extract(page, '<img src="https://images', '"')
|
url, pos = text.extract(page, '<img src="https://images', '"')
|
||||||
if not url:
|
if not url:
|
||||||
raise exception.NotFoundError("image")
|
raise self.exc.NotFoundError("image")
|
||||||
filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
|
filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
|
||||||
|
|
||||||
return text.nameext_from_name(filename, {
|
return text.nameext_from_name(filename, {
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"""Extractors for https://imgchest.com/"""
|
"""Extractors for https://imgchest.com/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor, Extractor, Message
|
from .common import GalleryExtractor, Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com"
|
||||||
|
|
||||||
@@ -40,7 +40,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
|
|||||||
post = data["props"]["post"]
|
post = data["props"]["post"]
|
||||||
except Exception:
|
except Exception:
|
||||||
if "<title>Not Found</title>" in page:
|
if "<title>Not Found</title>" in page:
|
||||||
raise exception.NotFoundError("gallery")
|
raise self.exc.NotFoundError("gallery")
|
||||||
self.files = ()
|
self.files = ()
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -142,11 +142,11 @@ class ImagechestAPI():
|
|||||||
return response.json()["data"]
|
return response.json()["data"]
|
||||||
|
|
||||||
elif response.status_code < 400:
|
elif response.status_code < 400:
|
||||||
raise exception.AuthenticationError("Invalid API access token")
|
raise self.exc.AuthenticationError("Invalid API access token")
|
||||||
|
|
||||||
elif response.status_code == 429:
|
elif response.status_code == 429:
|
||||||
self.extractor.wait(seconds=600)
|
self.extractor.wait(seconds=600)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.extractor.log.debug(response.text)
|
self.extractor.log.debug(response.text)
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.imagefap.com/"""
|
"""Extractors for https://www.imagefap.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ class ImagefapExtractor(Extractor):
|
|||||||
self.log.warning("HTTP redirect to '%s'", response.url)
|
self.log.warning("HTTP redirect to '%s'", response.url)
|
||||||
if msg := text.extr(response.text, '<div class="mt-4', '<'):
|
if msg := text.extr(response.text, '<div class="mt-4', '<'):
|
||||||
msg = " ".join(msg.partition(">")[2].split())
|
msg = " ".join(msg.partition(">")[2].split())
|
||||||
raise exception.AbortExtraction(f"'{msg}'")
|
raise self.exc.AbortExtraction(f"'{msg}'")
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Collection of extractors for various imagehosts"""
|
"""Collection of extractors for various imagehosts"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
|
|
||||||
@@ -81,7 +81,7 @@ class ImagehostImageExtractor(Extractor):
|
|||||||
return ()
|
return ()
|
||||||
|
|
||||||
def not_found(self, resource=None):
|
def not_found(self, resource=None):
|
||||||
raise exception.NotFoundError(resource or self.__class__.subcategory)
|
raise self.exc.NotFoundError(resource or self.__class__.subcategory)
|
||||||
|
|
||||||
|
|
||||||
class ImxtoImageExtractor(ImagehostImageExtractor):
|
class ImxtoImageExtractor(ImagehostImageExtractor):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://imgbb.com/"""
|
"""Extractors for https://imgbb.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -60,7 +60,7 @@ class ImgbbExtractor(Extractor):
|
|||||||
response = self.request(url, method="POST", headers=headers, data=data)
|
response = self.request(url, method="POST", headers=headers, data=data)
|
||||||
|
|
||||||
if not response.history:
|
if not response.history:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
return self.cookies
|
return self.cookies
|
||||||
|
|
||||||
def _pagination(self, page, url, params):
|
def _pagination(self, page, url, params):
|
||||||
@@ -193,10 +193,10 @@ class ImgbbUserExtractor(ImgbbExtractor):
|
|||||||
return self._pagination(response.text, url + "json", params)
|
return self._pagination(response.text, url + "json", params)
|
||||||
|
|
||||||
if response.status_code == 301:
|
if response.status_code == 301:
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
redirect = "HTTP redirect to " + response.headers.get("Location", "")
|
redirect = "HTTP redirect to " + response.headers.get("Location", "")
|
||||||
if response.status_code == 302:
|
if response.status_code == 302:
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
("username & password", "authenticated cookies"),
|
("username & password", "authenticated cookies"),
|
||||||
"profile", redirect)
|
"profile", redirect)
|
||||||
raise exception.AbortExtraction(redirect)
|
raise self.exc.AbortExtraction(redirect)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://imgbox.com/"""
|
"""Extractors for https://imgbox.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, AsynchronousMixin
|
from .common import Extractor, Message, AsynchronousMixin
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class ImgboxExtractor(Extractor):
|
class ImgboxExtractor(Extractor):
|
||||||
@@ -68,7 +68,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
|
|||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
page = self.request(self.root + "/g/" + self.gallery_key).text
|
page = self.request(self.root + "/g/" + self.gallery_key).text
|
||||||
if "The specified gallery could not be found." in page:
|
if "The specified gallery could not be found." in page:
|
||||||
raise exception.NotFoundError("gallery")
|
raise self.exc.NotFoundError("gallery")
|
||||||
self.image_keys = text.re(
|
self.image_keys = text.re(
|
||||||
r'<a href="/([^"]+)"><img alt="').findall(page)
|
r'<a href="/([^"]+)"><img alt="').findall(page)
|
||||||
|
|
||||||
@@ -104,5 +104,5 @@ class ImgboxImageExtractor(ImgboxExtractor):
|
|||||||
def get_image_metadata(self, page):
|
def get_image_metadata(self, page):
|
||||||
data = ImgboxExtractor.get_image_metadata(self, page)
|
data = ImgboxExtractor.get_image_metadata(self, page)
|
||||||
if not data["filename"]:
|
if not data["filename"]:
|
||||||
raise exception.NotFoundError("image")
|
raise self.exc.NotFoundError("image")
|
||||||
return data
|
return data
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://imgur.com/"""
|
"""Extractors for https://imgur.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)"
|
||||||
|
|
||||||
@@ -296,7 +296,7 @@ class ImgurAPI():
|
|||||||
return self.extractor.request_json(
|
return self.extractor.request_json(
|
||||||
"https://api.imgur.com" + endpoint,
|
"https://api.imgur.com" + endpoint,
|
||||||
params=params, headers=(headers or self.headers))
|
params=params, headers=(headers or self.headers))
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status not in (403, 429) or \
|
if exc.status not in (403, 429) or \
|
||||||
b"capacity" not in exc.response.content:
|
b"capacity" not in exc.response.content:
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://inkbunny.net/"""
|
"""Extractors for https://inkbunny.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -278,7 +278,7 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
|
|||||||
def posts(self):
|
def posts(self):
|
||||||
submissions = self.api.detail(({"submission_id": self.submission_id},))
|
submissions = self.api.detail(({"submission_id": self.submission_id},))
|
||||||
if submissions[0] is None:
|
if submissions[0] is None:
|
||||||
raise exception.NotFoundError("submission")
|
raise self.exc.NotFoundError("submission")
|
||||||
return submissions
|
return submissions
|
||||||
|
|
||||||
|
|
||||||
@@ -348,7 +348,7 @@ class InkbunnyAPI():
|
|||||||
self.authenticate(invalidate=True)
|
self.authenticate(invalidate=True)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raise exception.AbortExtraction(data.get("error_message"))
|
raise self.exc.AbortExtraction(data.get("error_message"))
|
||||||
|
|
||||||
def _pagination_search(self, params):
|
def _pagination_search(self, params):
|
||||||
params["page"] = 1
|
params["page"] = 1
|
||||||
@@ -379,5 +379,5 @@ def _authenticate_impl(api, username, password):
|
|||||||
data = api.extractor.request_json(url, method="POST", data=data)
|
data = api.extractor.request_json(url, method="POST", data=data)
|
||||||
|
|
||||||
if "sid" not in data:
|
if "sid" not in data:
|
||||||
raise exception.AuthenticationError(data.get("error_message"))
|
raise Extractor.exc.AuthenticationError(data.get("error_message"))
|
||||||
return data["sid"]
|
return data["sid"]
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"""Extractors for https://www.instagram.com/"""
|
"""Extractors for https://www.instagram.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import itertools
|
import itertools
|
||||||
import binascii
|
import binascii
|
||||||
@@ -143,7 +143,7 @@ class InstagramExtractor(Extractor):
|
|||||||
page = None
|
page = None
|
||||||
|
|
||||||
if page is not None:
|
if page is not None:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"HTTP redirect to {page} page ({url.partition('?')[0]})")
|
f"HTTP redirect to {page} page ({url.partition('?')[0]})")
|
||||||
|
|
||||||
www_claim = response.headers.get("x-ig-set-www-claim")
|
www_claim = response.headers.get("x-ig-set-www-claim")
|
||||||
@@ -678,7 +678,7 @@ class InstagramStoriesExtractor(InstagramExtractor):
|
|||||||
reel["items"] = (item,)
|
reel["items"] = (item,)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError("story")
|
raise self.exc.NotFoundError("story")
|
||||||
|
|
||||||
elif self.config("split"):
|
elif self.config("split"):
|
||||||
reel = reels[0]
|
reel = reels[0]
|
||||||
@@ -860,7 +860,7 @@ class InstagramRestAPI():
|
|||||||
try:
|
try:
|
||||||
return self._call(endpoint, params=params)["reels_media"]
|
return self._call(endpoint, params=params)["reels_media"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AuthRequired("authenticated cookies")
|
raise self.exc.AuthRequired("authenticated cookies")
|
||||||
|
|
||||||
def reels_tray(self):
|
def reels_tray(self):
|
||||||
endpoint = "/v1/feed/reels_tray/"
|
endpoint = "/v1/feed/reels_tray/"
|
||||||
@@ -893,7 +893,7 @@ class InstagramRestAPI():
|
|||||||
return self._call(
|
return self._call(
|
||||||
endpoint, params=params, notfound="user")["data"]["user"]
|
endpoint, params=params, notfound="user")["data"]["user"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
|
|
||||||
def user_by_search(self, username):
|
def user_by_search(self, username):
|
||||||
url = "https://www.instagram.com/web/search/topsearch/"
|
url = "https://www.instagram.com/web/search/topsearch/"
|
||||||
@@ -914,7 +914,7 @@ class InstagramRestAPI():
|
|||||||
if user := self.user_by_name(screen_name):
|
if user := self.user_by_name(screen_name):
|
||||||
return user
|
return user
|
||||||
self.user_by_name.invalidate(screen_name)
|
self.user_by_name.invalidate(screen_name)
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
|
|
||||||
def user_id(self, screen_name, check_private=True):
|
def user_id(self, screen_name, check_private=True):
|
||||||
if screen_name.startswith("id:"):
|
if screen_name.startswith("id:"):
|
||||||
@@ -1087,7 +1087,7 @@ class InstagramGraphqlAPI():
|
|||||||
self.user_id = api.user_id
|
self.user_id = api.user_id
|
||||||
|
|
||||||
def _unsupported(self, _=None):
|
def _unsupported(self, _=None):
|
||||||
raise exception.AbortExtraction("Unsupported with GraphQL API")
|
raise self.exc.AbortExtraction("Unsupported with GraphQL API")
|
||||||
|
|
||||||
def highlights_tray(self, user_id):
|
def highlights_tray(self, user_id):
|
||||||
query_hash = "d4d88dc1500312af6f937f7b804c68c3"
|
query_hash = "d4d88dc1500312af6f937f7b804c68c3"
|
||||||
@@ -1175,7 +1175,7 @@ class InstagramGraphqlAPI():
|
|||||||
elif not data["edges"]:
|
elif not data["edges"]:
|
||||||
user = self.extractor.item
|
user = self.extractor.item
|
||||||
s = "" if user.endswith("s") else "s"
|
s = "" if user.endswith("s") else "s"
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"{user}'{s} posts are private")
|
f"{user}'{s} posts are private")
|
||||||
|
|
||||||
variables["after"] = extr._update_cursor(info["end_cursor"])
|
variables["after"] = extr._update_cursor(info["end_cursor"])
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://www.iwara.tv/"""
|
"""Extractors for https://www.iwara.tv/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
@@ -100,7 +100,7 @@ class IwaraExtractor(Extractor):
|
|||||||
if type == "user":
|
if type == "user":
|
||||||
return self.items_user(results)
|
return self.items_user(results)
|
||||||
|
|
||||||
raise exception.AbortExtraction(f"Unsupported result type '{type}'")
|
raise self.exc.AbortExtraction(f"Unsupported result type '{type}'")
|
||||||
|
|
||||||
def extract_media_info(self, item, key, include_file_info=True):
|
def extract_media_info(self, item, key, include_file_info=True):
|
||||||
info = {
|
info = {
|
||||||
@@ -344,7 +344,7 @@ class IwaraAPI():
|
|||||||
|
|
||||||
def favorites(self, type):
|
def favorites(self, type):
|
||||||
if not self.username:
|
if not self.username:
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
"username & password", "your favorites")
|
"username & password", "your favorites")
|
||||||
endpoint = f"/favorites/{type}s"
|
endpoint = f"/favorites/{type}s"
|
||||||
return self._pagination(endpoint)
|
return self._pagination(endpoint)
|
||||||
@@ -398,7 +398,7 @@ class IwaraAPI():
|
|||||||
|
|
||||||
if not (refresh_token := data.get("token")):
|
if not (refresh_token := data.get("token")):
|
||||||
self.extractor.log.debug(data)
|
self.extractor.log.debug(data)
|
||||||
raise exception.AuthenticationError(data.get("message"))
|
raise self.exc.AuthenticationError(data.get("message"))
|
||||||
_refresh_token_cache.update(username, refresh_token)
|
_refresh_token_cache.update(username, refresh_token)
|
||||||
|
|
||||||
self.extractor.log.info("Refreshing access token for %s", username)
|
self.extractor.log.info("Refreshing access token for %s", username)
|
||||||
@@ -410,7 +410,7 @@ class IwaraAPI():
|
|||||||
|
|
||||||
if not (access_token := data.get("accessToken")):
|
if not (access_token := data.get("accessToken")):
|
||||||
self.extractor.log.debug(data)
|
self.extractor.log.debug(data)
|
||||||
raise exception.AuthenticationError(data.get("message"))
|
raise self.exc.AuthenticationError(data.get("message"))
|
||||||
return "Bearer " + access_token
|
return "Bearer " + access_token
|
||||||
|
|
||||||
def _call(self, endpoint, params=None, headers=None):
|
def _call(self, endpoint, params=None, headers=None):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://kabe-uchiroom.com/"""
|
"""Extractors for https://kabe-uchiroom.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class KabeuchiUserExtractor(Extractor):
|
class KabeuchiUserExtractor(Extractor):
|
||||||
@@ -47,7 +47,7 @@ class KabeuchiUserExtractor(Extractor):
|
|||||||
url = f"{self.root}/mypage/?id={uid}"
|
url = f"{self.root}/mypage/?id={uid}"
|
||||||
response = self.request(url)
|
response = self.request(url)
|
||||||
if response.history and response.url == self.root + "/":
|
if response.history and response.url == self.root + "/":
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
target_id = text.extr(response.text, 'user_friend_id = "', '"')
|
target_id = text.extr(response.text, 'user_friend_id = "', '"')
|
||||||
return self._pagination(target_id)
|
return self._pagination(target_id)
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://kemono.cr/"""
|
"""Extractors for https://kemono.cr/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
@@ -98,7 +98,7 @@ class KemonoExtractor(Extractor):
|
|||||||
try:
|
try:
|
||||||
creator = creator_info[key] = self.api.creator_profile(
|
creator = creator_info[key] = self.api.creator_profile(
|
||||||
service, creator_id)
|
service, creator_id)
|
||||||
except exception.HttpError:
|
except self.exc.HttpError:
|
||||||
self.log.warning("%s/%s/%s: 'Creator not found'",
|
self.log.warning("%s/%s/%s: 'Creator not found'",
|
||||||
service, creator_id, post["id"])
|
service, creator_id, post["id"])
|
||||||
creator = creator_info[key] = util.NONE
|
creator = creator_info[key] = util.NONE
|
||||||
@@ -211,7 +211,7 @@ class KemonoExtractor(Extractor):
|
|||||||
msg = f'"{response.json()["error"]}"'
|
msg = f'"{response.json()["error"]}"'
|
||||||
except Exception:
|
except Exception:
|
||||||
msg = '"Username or password is incorrect"'
|
msg = '"Username or password is incorrect"'
|
||||||
raise exception.AuthenticationError(msg)
|
raise self.exc.AuthenticationError(msg)
|
||||||
|
|
||||||
return {c.name: c.value for c in response.cookies}
|
return {c.name: c.value for c in response.cookies}
|
||||||
|
|
||||||
@@ -399,7 +399,7 @@ class KemonoPostExtractor(KemonoExtractor):
|
|||||||
if str(rev["revision_id"]) == revision_id:
|
if str(rev["revision_id"]) == revision_id:
|
||||||
return (rev,)
|
return (rev,)
|
||||||
|
|
||||||
raise exception.NotFoundError("revision")
|
raise self.exc.NotFoundError("revision")
|
||||||
|
|
||||||
|
|
||||||
class KemonoDiscordExtractor(KemonoExtractor):
|
class KemonoDiscordExtractor(KemonoExtractor):
|
||||||
@@ -419,7 +419,7 @@ class KemonoDiscordExtractor(KemonoExtractor):
|
|||||||
server, channels = discord_server_info(self, server_id)
|
server, channels = discord_server_info(self, server_id)
|
||||||
channel = channels[channel_id]
|
channel = channels[channel_id]
|
||||||
except Exception:
|
except Exception:
|
||||||
raise exception.NotFoundError("channel")
|
raise self.exc.NotFoundError("channel")
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"server" : server["name"],
|
"server" : server["name"],
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://downloads.khinsider.com/"""
|
"""Extractors for https://downloads.khinsider.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, AsynchronousMixin
|
from .common import Extractor, Message, AsynchronousMixin
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
|
class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
|
||||||
@@ -32,7 +32,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
|
|||||||
url = self.root + "/game-soundtracks/album/" + self.album
|
url = self.root + "/game-soundtracks/album/" + self.album
|
||||||
page = self.request(url, encoding="utf-8").text
|
page = self.request(url, encoding="utf-8").text
|
||||||
if "Download all songs at once:" not in page:
|
if "Download all songs at once:" not in page:
|
||||||
raise exception.NotFoundError("soundtrack")
|
raise self.exc.NotFoundError("soundtrack")
|
||||||
|
|
||||||
data = self.metadata(page)
|
data = self.metadata(page)
|
||||||
yield Message.Directory, "", data
|
yield Message.Directory, "", data
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://www.lofter.com/"""
|
"""Extractors for https://www.lofter.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
|
|
||||||
class LofterExtractor(Extractor):
|
class LofterExtractor(Extractor):
|
||||||
@@ -132,11 +132,11 @@ class LofterAPI():
|
|||||||
info = response.json()
|
info = response.json()
|
||||||
|
|
||||||
if info["meta"]["status"] == 4200:
|
if info["meta"]["status"] == 4200:
|
||||||
raise exception.NotFoundError("blog")
|
raise self.exc.NotFoundError("blog")
|
||||||
|
|
||||||
if info["meta"]["status"] != 200:
|
if info["meta"]["status"] != 200:
|
||||||
self.extractor.log.debug("Server response: %s", info)
|
self.extractor.log.debug("Server response: %s", info)
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|
||||||
return info["response"]
|
return info["response"]
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://members.luscious.net/"""
|
"""Extractors for https://members.luscious.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class LusciousExtractor(Extractor):
|
class LusciousExtractor(Extractor):
|
||||||
@@ -32,7 +32,7 @@ class LusciousExtractor(Extractor):
|
|||||||
|
|
||||||
if response.status_code >= 400:
|
if response.status_code >= 400:
|
||||||
self.log.debug("Server response: %s", response.text)
|
self.log.debug("Server response: %s", response.text)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"GraphQL query failed "
|
f"GraphQL query failed "
|
||||||
f"('{response.status_code} {response.reason}')")
|
f"('{response.status_code} {response.reason}')")
|
||||||
|
|
||||||
@@ -82,7 +82,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
|
|||||||
|
|
||||||
album = self._request_graphql("AlbumGet", variables)["album"]["get"]
|
album = self._request_graphql("AlbumGet", variables)["album"]["get"]
|
||||||
if "errors" in album:
|
if "errors" in album:
|
||||||
raise exception.NotFoundError("album")
|
raise self.exc.NotFoundError("album")
|
||||||
|
|
||||||
album["audiences"] = [item["title"] for item in album["audiences"]]
|
album["audiences"] = [item["title"] for item in album["audiences"]]
|
||||||
album["genres"] = [item["title"] for item in album["genres"]]
|
album["genres"] = [item["title"] for item in album["genres"]]
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://manga.madokami.al/"""
|
"""Extractors for https://manga.madokami.al/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?manga\.madokami\.al"
|
BASE_PATTERN = r"(?:https?://)?manga\.madokami\.al"
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
|
|||||||
def items(self):
|
def items(self):
|
||||||
username, password = self._get_auth_info()
|
username, password = self._get_auth_info()
|
||||||
if not username:
|
if not username:
|
||||||
raise exception.AuthRequired("username & password")
|
raise self.exc.AuthRequired("username & password")
|
||||||
self.session.auth = util.HTTPBasicAuth(username, password)
|
self.session.auth = util.HTTPBasicAuth(username, password)
|
||||||
|
|
||||||
url = f"{self.root}/Manga/{self.groups[0]}"
|
url = f"{self.root}/Manga/{self.groups[0]}"
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://mangadex.org/"""
|
"""Extractors for https://mangadex.org/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ class MangadexChapterExtractor(MangadexExtractor):
|
|||||||
data = self._transform(chapter)
|
data = self._transform(chapter)
|
||||||
|
|
||||||
if data.get("_external_url") and not data["count"]:
|
if data.get("_external_url") and not data["count"]:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Chapter {data['chapter']}{data['chapter_minor']} is not "
|
f"Chapter {data['chapter']}{data['chapter_minor']} is not "
|
||||||
f"available on MangaDex and can instead be read on the "
|
f"available on MangaDex and can instead be read on the "
|
||||||
f"official publisher's website at {data['_external_url']}.")
|
f"official publisher's website at {data['_external_url']}.")
|
||||||
@@ -333,7 +333,7 @@ class MangadexAPI():
|
|||||||
try:
|
try:
|
||||||
access_token = data["access_token"]
|
access_token = data["access_token"]
|
||||||
except Exception:
|
except Exception:
|
||||||
raise exception.AuthenticationError(data.get("error_description"))
|
raise self.exc.AuthenticationError(data.get("error_description"))
|
||||||
|
|
||||||
if refresh_token != data.get("refresh_token"):
|
if refresh_token != data.get("refresh_token"):
|
||||||
_refresh_token_cache.update(
|
_refresh_token_cache.update(
|
||||||
@@ -356,7 +356,7 @@ class MangadexAPI():
|
|||||||
data = self.extractor.request_json(
|
data = self.extractor.request_json(
|
||||||
url, method="POST", json=json, fatal=None)
|
url, method="POST", json=json, fatal=None)
|
||||||
if data.get("result") != "ok":
|
if data.get("result") != "ok":
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
if refresh_token != data["token"]["refresh"]:
|
if refresh_token != data["token"]["refresh"]:
|
||||||
_refresh_token_cache.update(username, data["token"]["refresh"])
|
_refresh_token_cache.update(username, data["token"]["refresh"])
|
||||||
@@ -381,7 +381,7 @@ class MangadexAPI():
|
|||||||
|
|
||||||
msg = ", ".join(f'{error["title"]}: "{error["detail"]}"'
|
msg = ", ".join(f'{error["title"]}: "{error["detail"]}"'
|
||||||
for error in response.json()["errors"])
|
for error in response.json()["errors"])
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"{response.status_code} {response.reason} ({msg})")
|
f"{response.status_code} {response.reason} ({msg})")
|
||||||
|
|
||||||
def _pagination_chapters(self, endpoint, params=None, auth=False):
|
def _pagination_chapters(self, endpoint, params=None, auth=False):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://mangafire.to/"""
|
"""Extractors for https://mangafire.to/"""
|
||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangafire\.to"
|
||||||
@@ -42,7 +42,7 @@ class MangafireChapterExtractor(MangafireBase, ChapterExtractor):
|
|||||||
chapters = _manga_chapters(self, (manga_id, self.type, lang))
|
chapters = _manga_chapters(self, (manga_id, self.type, lang))
|
||||||
anchor = chapters[chapter_info]
|
anchor = chapters[chapter_info]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.NotFoundError("chapter")
|
raise self.exc.NotFoundError("chapter")
|
||||||
self.chapter_id = text.extr(anchor, 'data-id="', '"')
|
self.chapter_id = text.extr(anchor, 'data-id="', '"')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"""Extractors for https://www.mangakakalot.gg/ and mirror sites"""
|
"""Extractors for https://www.mangakakalot.gg/ and mirror sites"""
|
||||||
|
|
||||||
from .common import BaseExtractor, ChapterExtractor, MangaExtractor, Message
|
from .common import BaseExtractor, ChapterExtractor, MangaExtractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
|
|
||||||
class ManganeloExtractor(BaseExtractor):
|
class ManganeloExtractor(BaseExtractor):
|
||||||
@@ -144,7 +144,7 @@ class ManganeloBookmarkExtractor(ManganeloExtractor):
|
|||||||
|
|
||||||
response = self.request(url, params=params)
|
response = self.request(url, params=params)
|
||||||
if response.history:
|
if response.history:
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
"authenticated cookies", "your bookmarks")
|
"authenticated cookies", "your bookmarks")
|
||||||
page = response.text
|
page = response.text
|
||||||
last = text.parse_int(text.extr(page, ">Last(", ")"))
|
last = text.parse_int(text.extr(page, ">Last(", ")"))
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://mangapark.net/"""
|
"""Extractors for https://mangapark.net/"""
|
||||||
|
|
||||||
from .common import ChapterExtractor, Extractor, Message
|
from .common import ChapterExtractor, Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:"
|
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:"
|
||||||
@@ -175,5 +175,5 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
|
|||||||
not lang or data["lang"] == lang):
|
not lang or data["lang"] == lang):
|
||||||
return data["id"]
|
return data["id"]
|
||||||
|
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"'{source}' does not match any available source")
|
f"'{source}' does not match any available source")
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://mangaread.org/"""
|
"""Extractors for https://mangaread.org/"""
|
||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class MangareadBase():
|
class MangareadBase():
|
||||||
@@ -40,7 +40,7 @@ class MangareadChapterExtractor(MangareadBase, ChapterExtractor):
|
|||||||
data = {"tags": list(text.split_html(tags)[::2])}
|
data = {"tags": list(text.split_html(tags)[::2])}
|
||||||
info = text.extr(page, '<h1 id="chapter-heading">', "</h1>")
|
info = text.extr(page, '<h1 id="chapter-heading">', "</h1>")
|
||||||
if not info:
|
if not info:
|
||||||
raise exception.NotFoundError("chapter")
|
raise self.exc.NotFoundError("chapter")
|
||||||
self.parse_chapter_string(info, data)
|
self.parse_chapter_string(info, data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@@ -61,7 +61,7 @@ class MangareadMangaExtractor(MangareadBase, MangaExtractor):
|
|||||||
|
|
||||||
def chapters(self, page):
|
def chapters(self, page):
|
||||||
if 'class="error404' in page:
|
if 'class="error404' in page:
|
||||||
raise exception.NotFoundError("manga")
|
raise self.exc.NotFoundError("manga")
|
||||||
data = self.metadata(page)
|
data = self.metadata(page)
|
||||||
results = []
|
results = []
|
||||||
for chapter in text.extract_iter(
|
for chapter in text.extract_iter(
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.mangoxo.com/"""
|
"""Extractors for https://www.mangoxo.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import hashlib
|
import hashlib
|
||||||
import time
|
import time
|
||||||
@@ -50,7 +50,7 @@ class MangoxoExtractor(Extractor):
|
|||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
if str(data.get("result")) != "1":
|
if str(data.get("result")) != "1":
|
||||||
raise exception.AuthenticationError(data.get("msg"))
|
raise self.exc.AuthenticationError(data.get("msg"))
|
||||||
return {"SESSION": self.cookies.get("SESSION")}
|
return {"SESSION": self.cookies.get("SESSION")}
|
||||||
|
|
||||||
def _sign_by_md5(self, username, password, token):
|
def _sign_by_md5(self, username, password, token):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for Mastodon instances"""
|
"""Extractors for Mastodon instances"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -246,7 +246,7 @@ class MastodonAPI():
|
|||||||
if account["acct"] == username:
|
if account["acct"] == username:
|
||||||
self.extractor._check_moved(account)
|
self.extractor._check_moved(account)
|
||||||
return account["id"]
|
return account["id"]
|
||||||
raise exception.NotFoundError("account")
|
raise self.exc.NotFoundError("account")
|
||||||
|
|
||||||
def account_bookmarks(self):
|
def account_bookmarks(self):
|
||||||
"""Statuses the user has bookmarked"""
|
"""Statuses the user has bookmarked"""
|
||||||
@@ -312,16 +312,16 @@ class MastodonAPI():
|
|||||||
if code < 400:
|
if code < 400:
|
||||||
return response
|
return response
|
||||||
if code == 401:
|
if code == 401:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Invalid or missing access token.\nRun 'gallery-dl oauth:"
|
f"Invalid or missing access token.\nRun 'gallery-dl oauth:"
|
||||||
f"mastodon:{self.extractor.instance}' to obtain one.")
|
f"mastodon:{self.extractor.instance}' to obtain one.")
|
||||||
if code == 404:
|
if code == 404:
|
||||||
raise exception.NotFoundError()
|
raise self.exc.NotFoundError()
|
||||||
if code == 429:
|
if code == 429:
|
||||||
self.extractor.wait(until=self.extractor.parse_datetime_iso(
|
self.extractor.wait(until=self.extractor.parse_datetime_iso(
|
||||||
response.headers["x-ratelimit-reset"]))
|
response.headers["x-ratelimit-reset"]))
|
||||||
continue
|
continue
|
||||||
raise exception.AbortExtraction(response.json().get("error"))
|
raise self.exc.AbortExtraction(response.json().get("error"))
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
url = endpoint
|
url = endpoint
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for Misskey instances"""
|
"""Extractors for Misskey instances"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message, Dispatch
|
from .common import BaseExtractor, Message, Dispatch
|
||||||
from .. import text, dt, exception
|
from .. import text, dt
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
|
|
||||||
@@ -239,7 +239,7 @@ class MisskeyAPI():
|
|||||||
def i_favorites(self):
|
def i_favorites(self):
|
||||||
endpoint = "/i/favorites"
|
endpoint = "/i/favorites"
|
||||||
if not self.access_token:
|
if not self.access_token:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
data = {"i": self.access_token}
|
data = {"i": self.access_token}
|
||||||
return self._pagination(endpoint, data)
|
return self._pagination(endpoint, data)
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://motherless.com/"""
|
"""Extractors for https://motherless.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, dt, exception
|
from .. import text, dt
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?motherless\.com"
|
BASE_PATTERN = r"(?:https?://)?motherless\.com"
|
||||||
@@ -28,7 +28,7 @@ class MotherlessExtractor(Extractor):
|
|||||||
content = response.content
|
content = response.content
|
||||||
if (b'<div class="error-page' in content or
|
if (b'<div class="error-page' in content or
|
||||||
b">The page you're looking for cannot be found.<" in content):
|
b">The page you're looking for cannot be found.<" in content):
|
||||||
raise exception.NotFoundError("page")
|
raise self.exc.NotFoundError("page")
|
||||||
|
|
||||||
self.request = Extractor.request.__get__(self)
|
self.request = Extractor.request.__get__(self)
|
||||||
return response
|
return response
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://myhentaigallery.com/"""
|
"""Extractors for https://myhentaigallery.com/"""
|
||||||
|
|
||||||
from .common import Extractor, GalleryExtractor, Message
|
from .common import Extractor, GalleryExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com"
|
BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com"
|
||||||
|
|
||||||
@@ -40,7 +40,7 @@ class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor):
|
|||||||
title = title[4:]
|
title = title[4:]
|
||||||
|
|
||||||
if not title:
|
if not title:
|
||||||
raise exception.NotFoundError("gallery")
|
raise self.exc.NotFoundError("gallery")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"title" : text.unescape(title),
|
"title" : text.unescape(title),
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.myportfolio.com/"""
|
"""Extractors for https://www.myportfolio.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class MyportfolioGalleryExtractor(Extractor):
|
class MyportfolioGalleryExtractor(Extractor):
|
||||||
@@ -34,7 +34,7 @@ class MyportfolioGalleryExtractor(Extractor):
|
|||||||
url = "https://" + self.domain + (self.path or "")
|
url = "https://" + self.domain + (self.path or "")
|
||||||
response = self.request(url)
|
response = self.request(url)
|
||||||
if response.history and response.url.endswith(".adobe.com/missing"):
|
if response.history and response.url.endswith(".adobe.com/missing"):
|
||||||
raise exception.NotFoundError()
|
raise self.exc.NotFoundError()
|
||||||
page = response.text
|
page = response.text
|
||||||
|
|
||||||
projects = text.extr(
|
projects = text.extr(
|
||||||
@@ -72,7 +72,7 @@ class MyportfolioGalleryExtractor(Extractor):
|
|||||||
elif user:
|
elif user:
|
||||||
user, _, title = user.partition(" - ")
|
user, _, title = user.partition(" - ")
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError()
|
raise self.exc.NotFoundError()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"user": text.unescape(user),
|
"user": text.unescape(user),
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.newgrounds.com/"""
|
"""Extractors for https://www.newgrounds.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, dt, exception
|
from .. import text, util, dt
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
if result.get("success"):
|
if result.get("success"):
|
||||||
break
|
break
|
||||||
if "errors" in result:
|
if "errors" in result:
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
'"' + '", "'.join(result["errors"]) + '"')
|
'"' + '", "'.join(result["errors"]) + '"')
|
||||||
|
|
||||||
if result.get("requiresMfa"):
|
if result.get("requiresMfa"):
|
||||||
@@ -370,7 +370,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
return
|
return
|
||||||
if "errors" in data:
|
if "errors" in data:
|
||||||
msg = ", ".join(text.unescape(e) for e in data["errors"])
|
msg = ", ".join(text.unescape(e) for e in data["errors"])
|
||||||
raise exception.AbortExtraction(msg)
|
raise self.exc.AbortExtraction(msg)
|
||||||
|
|
||||||
items = data.get("items")
|
items = data.get("items")
|
||||||
if not items:
|
if not items:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for nijie instances"""
|
"""Extractors for nijie instances"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
|
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
|
||||||
from .. import text, dt, exception
|
from .. import text, dt
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -134,7 +134,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
|||||||
if username:
|
if username:
|
||||||
return self.cookies_update(self._login_impl(username, password))
|
return self.cookies_update(self._login_impl(username, password))
|
||||||
|
|
||||||
raise exception.AuthenticationError("Username and password required")
|
raise self.exc.AuthenticationError("Username and password required")
|
||||||
|
|
||||||
@cache(maxage=90*86400, keyarg=1)
|
@cache(maxage=90*86400, keyarg=1)
|
||||||
def _login_impl(self, username, password):
|
def _login_impl(self, username, password):
|
||||||
@@ -145,7 +145,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
|||||||
|
|
||||||
response = self.request(url, method="POST", data=data)
|
response = self.request(url, method="POST", data=data)
|
||||||
if "/login.php" in response.text:
|
if "/login.php" in response.text:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
return self.cookies
|
return self.cookies
|
||||||
|
|
||||||
def _pagination(self, path):
|
def _pagination(self, path):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Utility classes to setup OAuth and link accounts to gallery-dl"""
|
"""Utility classes to setup OAuth and link accounts to gallery-dl"""
|
||||||
|
|
||||||
from .common import Extractor
|
from .common import Extractor
|
||||||
from .. import text, oauth, util, config, exception
|
from .. import text, oauth, util, config
|
||||||
from ..output import stdout_write
|
from ..output import stdout_write
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
|
|
||||||
@@ -74,7 +74,7 @@ class OAuthBase(Extractor):
|
|||||||
msg = "Received invalid"
|
msg = "Received invalid"
|
||||||
if exc:
|
if exc:
|
||||||
exc = f" ({exc.__class__.__name__}: {exc})"
|
exc = f" ({exc.__class__.__name__}: {exc})"
|
||||||
raise exception.AbortExtraction(f"{msg} OAuth response{exc}")
|
raise self.exc.AbortExtraction(f"{msg} OAuth response{exc}")
|
||||||
|
|
||||||
def send(self, msg):
|
def send(self, msg):
|
||||||
"""Send 'msg' to the socket opened in 'recv()'"""
|
"""Send 'msg' to the socket opened in 'recv()'"""
|
||||||
@@ -396,7 +396,7 @@ class OAuthMastodon(OAuthBase):
|
|||||||
data = self.request_json(url, method="POST", data=data)
|
data = self.request_json(url, method="POST", data=data)
|
||||||
|
|
||||||
if "client_id" not in data or "client_secret" not in data:
|
if "client_id" not in data or "client_secret" not in data:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Failed to register new application: '{data}'")
|
f"Failed to register new application: '{data}'")
|
||||||
|
|
||||||
data["client-id"] = data.pop("client_id")
|
data["client-id"] = data.pop("client_id")
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://rule34.paheal.net/"""
|
"""Extractors for https://rule34.paheal.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class PahealExtractor(Extractor):
|
class PahealExtractor(Extractor):
|
||||||
@@ -98,7 +98,7 @@ class PahealTagExtractor(PahealExtractor):
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
page = self.request(base + str(pnum)).text
|
page = self.request(base + str(pnum)).text
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 404:
|
if exc.status == 404:
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
@@ -152,7 +152,7 @@ class PahealPostExtractor(PahealExtractor):
|
|||||||
def get_posts(self):
|
def get_posts(self):
|
||||||
try:
|
try:
|
||||||
return (self._extract_post(self.groups[0]),)
|
return (self._extract_post(self.groups[0]),)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 404:
|
if exc.status == 404:
|
||||||
return ()
|
return ()
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.patreon.com/"""
|
"""Extractors for https://www.patreon.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, dt, exception
|
from .. import text, util, dt
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
import collections
|
import collections
|
||||||
import itertools
|
import itertools
|
||||||
@@ -347,7 +347,7 @@ class PatreonExtractor(Extractor):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
raise exception.AbortExtraction("Unable to extract bootstrap data")
|
raise self.exc.AbortExtraction("Unable to extract bootstrap data")
|
||||||
|
|
||||||
|
|
||||||
class PatreonCollectionExtractor(PatreonExtractor):
|
class PatreonCollectionExtractor(PatreonExtractor):
|
||||||
@@ -428,12 +428,12 @@ class PatreonCreatorExtractor(PatreonExtractor):
|
|||||||
data = None
|
data = None
|
||||||
data = self._extract_bootstrap(page)
|
data = self._extract_bootstrap(page)
|
||||||
return data["campaign"]["data"]["id"]
|
return data["campaign"]["data"]["id"]
|
||||||
except exception.ControlException:
|
except self.exc.ControlException:
|
||||||
pass
|
pass
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
if data:
|
if data:
|
||||||
self.log.debug(data)
|
self.log.debug(data)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Unable to extract campaign ID "
|
f"Unable to extract campaign ID "
|
||||||
f"({exc.__class__.__name__}: {exc})")
|
f"({exc.__class__.__name__}: {exc})")
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
|
|||||||
page, r'{\"value\":{\"campaign\":{\"data\":{\"id\":\"', '\\"'):
|
page, r'{\"value\":{\"campaign\":{\"data\":{\"id\":\"', '\\"'):
|
||||||
return cid
|
return cid
|
||||||
|
|
||||||
raise exception.AbortExtraction("Failed to extract campaign ID")
|
raise self.exc.AbortExtraction("Failed to extract campaign ID")
|
||||||
|
|
||||||
def _get_filters(self, params):
|
def _get_filters(self, params):
|
||||||
return "".join(
|
return "".join(
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://pexels.com/"""
|
"""Extractors for https://pexels.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com"
|
||||||
|
|
||||||
@@ -174,7 +174,7 @@ class PexelsAPI():
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
self.extractor.log.debug(response.text)
|
self.extractor.log.debug(response.text)
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for Philomena sites"""
|
"""Extractors for Philomena sites"""
|
||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class PhilomenaExtractor(BooruExtractor):
|
class PhilomenaExtractor(BooruExtractor):
|
||||||
@@ -113,7 +113,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
|
|||||||
try:
|
try:
|
||||||
return {"gallery": self.api.gallery(self.groups[-1])}
|
return {"gallery": self.api.gallery(self.groups[-1])}
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise exception.NotFoundError("gallery")
|
raise self.exc.NotFoundError("gallery")
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
gallery_id = "gallery_id:" + self.groups[-1]
|
gallery_id = "gallery_id:" + self.groups[-1]
|
||||||
@@ -159,7 +159,7 @@ class PhilomenaAPI():
|
|||||||
|
|
||||||
# error
|
# error
|
||||||
self.extractor.log.debug(response.content)
|
self.extractor.log.debug(response.content)
|
||||||
raise exception.HttpError("", response)
|
raise self.exc.HttpError("", response)
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
extr = self.extractor
|
extr = self.extractor
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://pholder.com/"""
|
"""Extractors for https://pholder.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?pholder\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?pholder\.com"
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ class PholderExtractor(Extractor):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
raise exception.AbortExtraction("Could not locate window.data JSON.")
|
raise self.exc.AbortExtraction("Could not locate window.data JSON.")
|
||||||
|
|
||||||
def _posts(self, page_url):
|
def _posts(self, page_url):
|
||||||
params = {"page": 1}
|
params = {"page": 1}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
|
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
|
||||||
|
|
||||||
@@ -108,7 +108,7 @@ class PillowfortExtractor(Extractor):
|
|||||||
response = self.request(url, method="POST", headers=headers, data=data)
|
response = self.request(url, method="POST", headers=headers, data=data)
|
||||||
|
|
||||||
if not response.history:
|
if not response.history:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
cookie.name: cookie.value
|
cookie.name: cookie.value
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.pinterest.com/"""
|
"""Extractors for https://www.pinterest.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
|
BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
|
||||||
@@ -394,7 +394,7 @@ class PinterestPinitExtractor(PinterestExtractor):
|
|||||||
f"/{self.groups[0]}/redirect/")
|
f"/{self.groups[0]}/redirect/")
|
||||||
location = self.request_location(url)
|
location = self.request_location(url)
|
||||||
if not location:
|
if not location:
|
||||||
raise exception.NotFoundError("pin")
|
raise self.exc.NotFoundError("pin")
|
||||||
elif PinterestPinExtractor.pattern.match(location):
|
elif PinterestPinExtractor.pattern.match(location):
|
||||||
yield Message.Queue, location, {
|
yield Message.Queue, location, {
|
||||||
"_extractor": PinterestPinExtractor}
|
"_extractor": PinterestPinExtractor}
|
||||||
@@ -402,7 +402,7 @@ class PinterestPinitExtractor(PinterestExtractor):
|
|||||||
yield Message.Queue, location, {
|
yield Message.Queue, location, {
|
||||||
"_extractor": PinterestBoardExtractor}
|
"_extractor": PinterestBoardExtractor}
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError("pin")
|
raise self.exc.NotFoundError("pin")
|
||||||
|
|
||||||
|
|
||||||
class PinterestAPI():
|
class PinterestAPI():
|
||||||
@@ -545,9 +545,9 @@ class PinterestAPI():
|
|||||||
return data
|
return data
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
resource = self.extractor.subcategory.rpartition("-")[2]
|
resource = self.extractor.subcategory.rpartition("-")[2]
|
||||||
raise exception.NotFoundError(resource)
|
raise self.exc.NotFoundError(resource)
|
||||||
self.extractor.log.debug("Server response: %s", response.text)
|
self.extractor.log.debug("Server response: %s", response.text)
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|
||||||
def _pagination(self, resource, options):
|
def _pagination(self, resource, options):
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.pixiv.net/"""
|
"""Extractors for https://www.pixiv.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, dt, exception
|
from .. import text, util, dt
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import itertools
|
import itertools
|
||||||
import hashlib
|
import hashlib
|
||||||
@@ -205,7 +205,7 @@ class PixivExtractor(Extractor):
|
|||||||
url = f"{base}0.{ext}"
|
url = f"{base}0.{ext}"
|
||||||
self.request(url, method="HEAD")
|
self.request(url, method="HEAD")
|
||||||
break
|
break
|
||||||
except exception.HttpError:
|
except self.exc.HttpError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
self.log.warning(
|
self.log.warning(
|
||||||
@@ -329,7 +329,7 @@ class PixivExtractor(Extractor):
|
|||||||
url = f"{base}_p0.{ext}"
|
url = f"{base}_p0.{ext}"
|
||||||
self.request(url, method="HEAD")
|
self.request(url, method="HEAD")
|
||||||
return url
|
return url
|
||||||
except exception.HttpError:
|
except self.exc.HttpError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _sanitize_ajax_caption(self, caption):
|
def _sanitize_ajax_caption(self, caption):
|
||||||
@@ -721,7 +721,7 @@ class PixivRankingExtractor(PixivExtractor):
|
|||||||
try:
|
try:
|
||||||
self.mode = mode = mode_map[mode]
|
self.mode = mode = mode_map[mode]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AbortExtraction(f"Invalid mode '{mode}'")
|
raise self.exc.AbortExtraction(f"Invalid mode '{mode}'")
|
||||||
|
|
||||||
if date := query.get("date"):
|
if date := query.get("date"):
|
||||||
if len(date) == 8 and date.isdecimal():
|
if len(date) == 8 and date.isdecimal():
|
||||||
@@ -772,7 +772,7 @@ class PixivSearchExtractor(PixivExtractor):
|
|||||||
try:
|
try:
|
||||||
self.word = query["word"]
|
self.word = query["word"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AbortExtraction("Missing search term")
|
raise self.exc.AbortExtraction("Missing search term")
|
||||||
|
|
||||||
sort = query.get("order", "date_d")
|
sort = query.get("order", "date_d")
|
||||||
sort_map = {
|
sort_map = {
|
||||||
@@ -785,7 +785,7 @@ class PixivSearchExtractor(PixivExtractor):
|
|||||||
try:
|
try:
|
||||||
self.sort = sort = sort_map[sort]
|
self.sort = sort = sort_map[sort]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AbortExtraction(f"Invalid search order '{sort}'")
|
raise self.exc.AbortExtraction(f"Invalid search order '{sort}'")
|
||||||
|
|
||||||
target = query.get("s_mode", "s_tag_full")
|
target = query.get("s_mode", "s_tag_full")
|
||||||
target_map = {
|
target_map = {
|
||||||
@@ -796,7 +796,7 @@ class PixivSearchExtractor(PixivExtractor):
|
|||||||
try:
|
try:
|
||||||
self.target = target = target_map[target]
|
self.target = target = target_map[target]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AbortExtraction(f"Invalid search mode '{target}'")
|
raise self.exc.AbortExtraction(f"Invalid search mode '{target}'")
|
||||||
|
|
||||||
self.date_start = query.get("scd")
|
self.date_start = query.get("scd")
|
||||||
self.date_end = query.get("ecd")
|
self.date_end = query.get("ecd")
|
||||||
@@ -1153,7 +1153,7 @@ class PixivAppAPI():
|
|||||||
@cache(maxage=3600, keyarg=1)
|
@cache(maxage=3600, keyarg=1)
|
||||||
def _login_impl(self, username):
|
def _login_impl(self, username):
|
||||||
if not self.refresh_token:
|
if not self.refresh_token:
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
"'refresh-token' required.\n"
|
"'refresh-token' required.\n"
|
||||||
"Run `gallery-dl oauth:pixiv` to get one.")
|
"Run `gallery-dl oauth:pixiv` to get one.")
|
||||||
|
|
||||||
@@ -1178,7 +1178,7 @@ class PixivAppAPI():
|
|||||||
url, method="POST", headers=headers, data=data, fatal=False)
|
url, method="POST", headers=headers, data=data, fatal=False)
|
||||||
if response.status_code >= 400:
|
if response.status_code >= 400:
|
||||||
self.log.debug(response.text)
|
self.log.debug(response.text)
|
||||||
raise exception.AuthenticationError("Invalid refresh token")
|
raise self.exc.AuthenticationError("Invalid refresh token")
|
||||||
|
|
||||||
data = response.json()["response"]
|
data = response.json()["response"]
|
||||||
return data["user"], "Bearer " + data["access_token"]
|
return data["user"], "Bearer " + data["access_token"]
|
||||||
@@ -1305,7 +1305,7 @@ class PixivAppAPI():
|
|||||||
self.log.debug(data)
|
self.log.debug(data)
|
||||||
|
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
raise exception.NotFoundError()
|
raise self.exc.NotFoundError()
|
||||||
|
|
||||||
error = data["error"]
|
error = data["error"]
|
||||||
if "rate limit" in (error.get("message") or "").lower():
|
if "rate limit" in (error.get("message") or "").lower():
|
||||||
@@ -1315,7 +1315,7 @@ class PixivAppAPI():
|
|||||||
msg = (f"'{msg}'" if (msg := error.get("user_message")) else
|
msg = (f"'{msg}'" if (msg := error.get("user_message")) else
|
||||||
f"'{msg}'" if (msg := error.get("message")) else
|
f"'{msg}'" if (msg := error.get("message")) else
|
||||||
error)
|
error)
|
||||||
raise exception.AbortExtraction("API request failed: " + msg)
|
raise self.exc.AbortExtraction("API request failed: " + msg)
|
||||||
|
|
||||||
def _pagination(self, endpoint, params,
|
def _pagination(self, endpoint, params,
|
||||||
key_items="illusts", key_data=None, key_user=None):
|
key_items="illusts", key_data=None, key_user=None):
|
||||||
@@ -1326,7 +1326,7 @@ class PixivAppAPI():
|
|||||||
if key_user is not None and not data[key_user].get("id"):
|
if key_user is not None and not data[key_user].get("id"):
|
||||||
user = self.user_detail(self.extractor.user_id, fatal=False)
|
user = self.user_detail(self.extractor.user_id, fatal=False)
|
||||||
if user.get("error"):
|
if user.get("error"):
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
return
|
return
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.pixnet.net/"""
|
"""Extractors for https://www.pixnet.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?!www\.)([\w-]+)\.pixnet.net"
|
BASE_PATTERN = r"(?:https?://)?(?!www\.)([\w-]+)\.pixnet.net"
|
||||||
|
|
||||||
@@ -52,7 +52,7 @@ class PixnetExtractor(Extractor):
|
|||||||
|
|
||||||
pnext = text.extr(page, 'class="nextBtn"', '>')
|
pnext = text.extr(page, 'class="nextBtn"', '>')
|
||||||
if pnext is None and 'name="albumpass">' in page:
|
if pnext is None and 'name="albumpass">' in page:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Album {self.item_id} is password-protected.")
|
f"Album {self.item_id} is password-protected.")
|
||||||
if "href" not in pnext:
|
if "href" not in pnext:
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.plurk.com/"""
|
"""Extractors for https://www.plurk.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, dt, exception
|
from .. import text, util, dt
|
||||||
|
|
||||||
|
|
||||||
class PlurkExtractor(Extractor):
|
class PlurkExtractor(Extractor):
|
||||||
@@ -59,7 +59,7 @@ class PlurkExtractor(Extractor):
|
|||||||
|
|
||||||
def _load(self, data):
|
def _load(self, data):
|
||||||
if not data:
|
if not data:
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
return util.json_loads(
|
return util.json_loads(
|
||||||
text.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
|
text.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for http://www.poringa.net/"""
|
"""Extractors for http://www.poringa.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
@@ -32,7 +32,7 @@ class PoringaExtractor(Extractor):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.request(url)
|
response = self.request(url)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
self.log.warning(
|
self.log.warning(
|
||||||
"Unable to fetch posts for '%s' (%s)", post_id, exc)
|
"Unable to fetch posts for '%s' (%s)", post_id, exc)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.pornhub.com/"""
|
"""Extractors for https://www.pornhub.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com"
|
||||||
|
|
||||||
@@ -109,7 +109,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
|
|||||||
data = self.request_json(url, params=params)
|
data = self.request_json(url, params=params)
|
||||||
|
|
||||||
if not (images := data.get("photos")):
|
if not (images := data.get("photos")):
|
||||||
raise exception.AuthorizationError()
|
raise self.exc.AuthorizationError()
|
||||||
key = end = self._first
|
key = end = self._first
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for Postmill instances"""
|
"""Extractors for Postmill instances"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class PostmillExtractor(BaseExtractor):
|
class PostmillExtractor(BaseExtractor):
|
||||||
@@ -102,7 +102,7 @@ class PostmillSubmissionsExtractor(PostmillExtractor):
|
|||||||
if response.history:
|
if response.history:
|
||||||
redirect_url = response.url
|
redirect_url = response.url
|
||||||
if redirect_url == self.root + "/login":
|
if redirect_url == self.root + "/login":
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"HTTP redirect to login page ({redirect_url})")
|
f"HTTP redirect to login page ({redirect_url})")
|
||||||
page = response.text
|
page = response.text
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://readcomiconline.li/"""
|
"""Extractors for https://readcomiconline.li/"""
|
||||||
|
|
||||||
from .common import Extractor, ChapterExtractor, MangaExtractor
|
from .common import Extractor, ChapterExtractor, MangaExtractor
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
import binascii
|
import binascii
|
||||||
|
|
||||||
BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)"
|
BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)"
|
||||||
@@ -36,7 +36,7 @@ class ReadcomiconlineBase():
|
|||||||
"the CAPTCHA, and press ENTER to continue", response.url)
|
"the CAPTCHA, and press ENTER to continue", response.url)
|
||||||
self.input()
|
self.input()
|
||||||
else:
|
else:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Redirect to \n{response.url}\nVisit this URL in your "
|
f"Redirect to \n{response.url}\nVisit this URL in your "
|
||||||
f"browser and solve the CAPTCHA to continue")
|
f"browser and solve the CAPTCHA to continue")
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.reddit.com/"""
|
"""Extractors for https://www.reddit.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -525,7 +525,7 @@ class RedditAPI():
|
|||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
self.log.debug("Server response: %s", data)
|
self.log.debug("Server response: %s", data)
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
f"\"{data.get('error')}: {data.get('message')}\"")
|
f"\"{data.get('error')}: {data.get('message')}\"")
|
||||||
return "Bearer " + data["access_token"]
|
return "Bearer " + data["access_token"]
|
||||||
|
|
||||||
@@ -555,16 +555,16 @@ class RedditAPI():
|
|||||||
try:
|
try:
|
||||||
data = response.json()
|
data = response.json()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
text.remove_html(response.text))
|
text.remove_html(response.text))
|
||||||
|
|
||||||
if "error" in data:
|
if "error" in data:
|
||||||
if data["error"] == 403:
|
if data["error"] == 403:
|
||||||
raise exception.AuthorizationError()
|
raise self.exc.AuthorizationError()
|
||||||
if data["error"] == 404:
|
if data["error"] == 404:
|
||||||
raise exception.NotFoundError()
|
raise self.exc.NotFoundError()
|
||||||
self.log.debug(data)
|
self.log.debug(data)
|
||||||
raise exception.AbortExtraction(data.get("message"))
|
raise self.exc.AbortExtraction(data.get("message"))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
@@ -592,7 +592,7 @@ class RedditAPI():
|
|||||||
if post["num_comments"] and self.comments:
|
if post["num_comments"] and self.comments:
|
||||||
try:
|
try:
|
||||||
yield self.submission(post["id"])
|
yield self.submission(post["id"])
|
||||||
except exception.AuthorizationError:
|
except self.exc.AuthorizationError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
yield post, ()
|
yield post, ()
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://rule34.xyz/"""
|
"""Extractors for https://rule34.xyz/"""
|
||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ class Rule34xyzExtractor(BooruExtractor):
|
|||||||
|
|
||||||
if jwt := response.get("jwt"):
|
if jwt := response.get("jwt"):
|
||||||
return "Bearer " + jwt
|
return "Bearer " + jwt
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
(msg := response.get("message")) and f'"{msg}"')
|
(msg := response.get("message")) and f'"{msg}"')
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from .common import Message
|
from .common import Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
@@ -284,7 +284,7 @@ class SankakuAPI():
|
|||||||
if response.status_code == 429:
|
if response.status_code == 429:
|
||||||
until = response.headers.get("X-RateLimit-Reset")
|
until = response.headers.get("X-RateLimit-Reset")
|
||||||
if not until and b"_tags-explicit-limit" in response.content:
|
if not until and b"_tags-explicit-limit" in response.content:
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
"Search tag limit exceeded")
|
"Search tag limit exceeded")
|
||||||
seconds = None if until else 600
|
seconds = None if until else 600
|
||||||
self.extractor.wait(until=until, seconds=seconds)
|
self.extractor.wait(until=until, seconds=seconds)
|
||||||
@@ -305,7 +305,7 @@ class SankakuAPI():
|
|||||||
code = f"'{code.rpartition('__')[2].replace('-', ' ')}'"
|
code = f"'{code.rpartition('__')[2].replace('-', ' ')}'"
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
raise exception.AbortExtraction(code)
|
raise self.exc.AbortExtraction(code)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
@@ -365,5 +365,5 @@ def _authenticate_impl(extr, username, password):
|
|||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
if response.status_code >= 400 or not data.get("success"):
|
if response.status_code >= 400 or not data.get("success"):
|
||||||
raise exception.AuthenticationError(data.get("error"))
|
raise extr.exc.AuthenticationError(data.get("error"))
|
||||||
return "Bearer " + data["access_token"]
|
return "Bearer " + data["access_token"]
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://niyaniya.moe/"""
|
"""Extractors for https://niyaniya.moe/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor, Extractor, Message
|
from .common import GalleryExtractor, Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
@@ -66,7 +66,7 @@ class SchalenetworkExtractor(Extractor):
|
|||||||
if token := self.config("token"):
|
if token := self.config("token"):
|
||||||
return "Bearer " + token.rpartition(' ')[2]
|
return "Bearer " + token.rpartition(' ')[2]
|
||||||
if required:
|
if required:
|
||||||
raise exception.AuthRequired("'token'", "your favorites")
|
raise self.exc.AuthRequired("'token'", "your favorites")
|
||||||
|
|
||||||
def _crt(self):
|
def _crt(self):
|
||||||
crt = self.config("crt")
|
crt = self.config("crt")
|
||||||
@@ -88,7 +88,7 @@ class SchalenetworkExtractor(Extractor):
|
|||||||
msg = None
|
msg = None
|
||||||
else:
|
else:
|
||||||
msg = f"{exc.status} {exc.response.reason}"
|
msg = f"{exc.status} {exc.response.reason}"
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
"'crt' query parameter & matching 'user-agent'", None, msg)
|
"'crt' query parameter & matching 'user-agent'", None, msg)
|
||||||
|
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
|
|||||||
try:
|
try:
|
||||||
data_fmt = self.request_json(
|
data_fmt = self.request_json(
|
||||||
url, method="POST", headers=headers)
|
url, method="POST", headers=headers)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
self._require_auth(exc)
|
self._require_auth(exc)
|
||||||
|
|
||||||
self.fmt = self._select_format(data_fmt["data"])
|
self.fmt = self._select_format(data_fmt["data"])
|
||||||
@@ -217,7 +217,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
|
|||||||
self.log.debug("%s: Format %s is not available",
|
self.log.debug("%s: Format %s is not available",
|
||||||
self.groups[1], fmtid)
|
self.groups[1], fmtid)
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError("format")
|
raise self.exc.NotFoundError("format")
|
||||||
|
|
||||||
self.log.debug("%s: Selected format %s", self.groups[1], fmtid)
|
self.log.debug("%s: Selected format %s", self.groups[1], fmtid)
|
||||||
fmt["w"] = fmtid
|
fmt["w"] = fmtid
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://scrolller.com/"""
|
"""Extractors for https://scrolller.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
|
||||||
@@ -82,9 +82,9 @@ class ScrolllerExtractor(Extractor):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
data = self._request_graphql("LoginQuery", variables, False)
|
data = self._request_graphql("LoginQuery", variables, False)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 403:
|
if exc.status == 403:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return data["login"]["token"]
|
return data["login"]["token"]
|
||||||
@@ -206,7 +206,7 @@ class ScrolllerFollowingExtractor(ScrolllerExtractor):
|
|||||||
self.login()
|
self.login()
|
||||||
|
|
||||||
if not self.auth_token:
|
if not self.auth_token:
|
||||||
raise exception.AuthorizationError("Login required")
|
raise self.exc.AuthorizationError("Login required")
|
||||||
|
|
||||||
variables = {
|
variables = {
|
||||||
"iterator": None,
|
"iterator": None,
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://seiga.nicovideo.jp/"""
|
"""Extractors for https://seiga.nicovideo.jp/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ class SeigaExtractor(Extractor):
|
|||||||
url = f"{self.root}/image/source/{image_id}"
|
url = f"{self.root}/image/source/{image_id}"
|
||||||
location = self.request_location(url, notfound="image")
|
location = self.request_location(url, notfound="image")
|
||||||
if "nicovideo.jp/login" in location:
|
if "nicovideo.jp/login" in location:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"HTTP redirect to login page ({location.partition('?')[0]})")
|
f"HTTP redirect to login page ({location.partition('?')[0]})")
|
||||||
return location.replace("/o/", "/priv/", 1)
|
return location.replace("/o/", "/priv/", 1)
|
||||||
|
|
||||||
@@ -57,7 +57,7 @@ class SeigaExtractor(Extractor):
|
|||||||
if username:
|
if username:
|
||||||
return self.cookies_update(self._login_impl(username, password))
|
return self.cookies_update(self._login_impl(username, password))
|
||||||
|
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
"username & password or 'user_session' cookie required")
|
"username & password or 'user_session' cookie required")
|
||||||
|
|
||||||
@cache(maxage=365*86400, keyarg=1)
|
@cache(maxage=365*86400, keyarg=1)
|
||||||
@@ -76,7 +76,7 @@ class SeigaExtractor(Extractor):
|
|||||||
response = self.request(url, method="POST", data=data)
|
response = self.request(url, method="POST", data=data)
|
||||||
|
|
||||||
if "message=cant_login" in response.url:
|
if "message=cant_login" in response.url:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
if "/mfa" in response.url:
|
if "/mfa" in response.url:
|
||||||
page = response.text
|
page = response.text
|
||||||
@@ -93,7 +93,7 @@ class SeigaExtractor(Extractor):
|
|||||||
|
|
||||||
if not response.history and \
|
if not response.history and \
|
||||||
b"Confirmation code is incorrect" in response.content:
|
b"Confirmation code is incorrect" in response.content:
|
||||||
raise exception.AuthenticationError(
|
raise self.exc.AuthenticationError(
|
||||||
"Incorrect Confirmation Code")
|
"Incorrect Confirmation Code")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -133,7 +133,7 @@ class SeigaUserExtractor(SeigaExtractor):
|
|||||||
))[0]
|
))[0]
|
||||||
|
|
||||||
if not data["name"] and "ユーザー情報が取得出来ませんでした" in page:
|
if not data["name"] and "ユーザー情報が取得出来ませんでした" in page:
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"user": {
|
"user": {
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extract hentai-manga from https://www.simply-hentai.com/"""
|
"""Extract hentai-manga from https://www.simply-hentai.com/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor, Extractor, Message
|
from .common import GalleryExtractor, Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
|
|
||||||
|
|
||||||
class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||||
@@ -38,7 +38,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
|||||||
title = extr('<meta property="og:title" content="', '"')
|
title = extr('<meta property="og:title" content="', '"')
|
||||||
image = extr('<meta property="og:image" content="', '"')
|
image = extr('<meta property="og:image" content="', '"')
|
||||||
if not title:
|
if not title:
|
||||||
raise exception.NotFoundError("gallery")
|
raise self.exc.NotFoundError("gallery")
|
||||||
data = {
|
data = {
|
||||||
"title" : text.unescape(title),
|
"title" : text.unescape(title),
|
||||||
"gallery_id": text.parse_int(image.split("/")[-2]),
|
"gallery_id": text.parse_int(image.split("/")[-2]),
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.smugmug.com/"""
|
"""Extractors for https://www.smugmug.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, oauth, exception
|
from .. import text, oauth
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
r"(?:smugmug:(?!album:)(?:https?://)?([^/]+)|"
|
r"(?:smugmug:(?!album:)(?:https?://)?([^/]+)|"
|
||||||
@@ -209,17 +209,17 @@ class SmugmugAPI(oauth.OAuth1API):
|
|||||||
if 200 <= data["Code"] < 400:
|
if 200 <= data["Code"] < 400:
|
||||||
return data
|
return data
|
||||||
if data["Code"] == 404:
|
if data["Code"] == 404:
|
||||||
raise exception.NotFoundError()
|
raise self.exc.NotFoundError()
|
||||||
if data["Code"] == 429:
|
if data["Code"] == 429:
|
||||||
raise exception.AbortExtraction("Rate limit reached")
|
raise self.exc.AbortExtraction("Rate limit reached")
|
||||||
self.log.debug(data)
|
self.log.debug(data)
|
||||||
raise exception.AbortExtraction("API request failed")
|
raise self.exc.AbortExtraction("API request failed")
|
||||||
|
|
||||||
def _expansion(self, endpoint, expands, params=None):
|
def _expansion(self, endpoint, expands, params=None):
|
||||||
endpoint = self._extend(endpoint, expands)
|
endpoint = self._extend(endpoint, expands)
|
||||||
result = self._apply_expansions(self._call(endpoint, params), expands)
|
result = self._apply_expansions(self._call(endpoint, params), expands)
|
||||||
if not result:
|
if not result:
|
||||||
raise exception.NotFoundError()
|
raise self.exc.NotFoundError()
|
||||||
return result[0]
|
return result[0]
|
||||||
|
|
||||||
def _pagination(self, endpoint, expands=None):
|
def _pagination(self, endpoint, expands=None):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://www.steamgriddb.com"""
|
"""Extractors for https://www.steamgriddb.com"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com"
|
||||||
@@ -74,7 +74,7 @@ class SteamgriddbExtractor(Extractor):
|
|||||||
def _call(self, endpoint, **kwargs):
|
def _call(self, endpoint, **kwargs):
|
||||||
data = self.request_json(self.root + endpoint, **kwargs)
|
data = self.request_json(self.root + endpoint, **kwargs)
|
||||||
if not data["success"]:
|
if not data["success"]:
|
||||||
raise exception.AbortExtraction(data["error"])
|
raise self.exc.AbortExtraction(data["error"])
|
||||||
return data["data"]
|
return data["data"]
|
||||||
|
|
||||||
|
|
||||||
@@ -96,7 +96,7 @@ class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
|
|||||||
sort = self.config("sort", "score_desc")
|
sort = self.config("sort", "score_desc")
|
||||||
if sort not in ("score_desc", "score_asc", "score_old_desc",
|
if sort not in ("score_desc", "score_asc", "score_old_desc",
|
||||||
"score_old_asc", "age_desc", "age_asc"):
|
"score_old_asc", "age_desc", "age_asc"):
|
||||||
raise exception.AbortExtraction(f"Invalid sort '{sort}'")
|
raise self.exc.AbortExtraction(f"Invalid sort '{sort}'")
|
||||||
|
|
||||||
json = {
|
json = {
|
||||||
"static" : self.config("static", True),
|
"static" : self.config("static", True),
|
||||||
@@ -149,7 +149,7 @@ class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
|
|||||||
|
|
||||||
for i in value:
|
for i in value:
|
||||||
if i not in valid_values:
|
if i not in valid_values:
|
||||||
raise exception.AbortExtraction(f"Invalid {type_name} '{i}'")
|
raise self.exc.AbortExtraction(f"Invalid {type_name} '{i}'")
|
||||||
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
@@ -169,7 +169,7 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor):
|
|||||||
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
|
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
|
||||||
asset = self._call(endpoint)["asset"]
|
asset = self._call(endpoint)["asset"]
|
||||||
if asset is None:
|
if asset is None:
|
||||||
raise exception.NotFoundError(
|
raise self.exc.NotFoundError(
|
||||||
f"asset ({self.asset_type}:{self.asset_id})")
|
f"asset ({self.asset_type}:{self.asset_id})")
|
||||||
return (asset,)
|
return (asset,)
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.subscribestar.com/"""
|
"""Extractors for https://www.subscribestar.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
|
||||||
@@ -71,7 +71,7 @@ class SubscribestarExtractor(Extractor):
|
|||||||
if response.history and (
|
if response.history and (
|
||||||
"/verify_subscriber" in response.url or
|
"/verify_subscriber" in response.url or
|
||||||
"/age_confirmation_warning" in response.url):
|
"/age_confirmation_warning" in response.url):
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"HTTP redirect to " + response.url)
|
"HTTP redirect to " + response.url)
|
||||||
|
|
||||||
content = response.content
|
content = response.content
|
||||||
@@ -127,7 +127,7 @@ class SubscribestarExtractor(Extractor):
|
|||||||
msg = f'"{errors.popitem()[1]}"'
|
msg = f'"{errors.popitem()[1]}"'
|
||||||
except Exception:
|
except Exception:
|
||||||
msg = None
|
msg = None
|
||||||
raise exception.AuthenticationError(msg)
|
raise self.exc.AuthenticationError(msg)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# submit username / email
|
# submit username / email
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://tapas.io/"""
|
"""Extractors for https://tapas.io/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?tapas\.io"
|
BASE_PATTERN = r"(?:https?://)?tapas\.io"
|
||||||
@@ -61,7 +61,7 @@ class TapasExtractor(Extractor):
|
|||||||
|
|
||||||
if not response.history or \
|
if not response.history or \
|
||||||
"/account/signin_fail" in response.history[-1].url:
|
"/account/signin_fail" in response.history[-1].url:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
return {"_cpc_": response.history[0].cookies.get("_cpc_")}
|
return {"_cpc_": response.history[0].cookies.get("_cpc_")}
|
||||||
|
|
||||||
@@ -84,7 +84,7 @@ class TapasEpisodeExtractor(TapasExtractor):
|
|||||||
|
|
||||||
episode = data["episode"]
|
episode = data["episode"]
|
||||||
if not episode.get("free") and not episode.get("unlocked"):
|
if not episode.get("free") and not episode.get("unlocked"):
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
f"{episode_id}: Episode '{episode['title']}' not unlocked")
|
f"{episode_id}: Episode '{episode['title']}' not unlocked")
|
||||||
|
|
||||||
html = data["html"]
|
html = data["html"]
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://thefap.net/"""
|
"""Extractors for https://thefap.net/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net"
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ class ThefapPostExtractor(ThefapExtractor):
|
|||||||
|
|
||||||
page = self.request(self.root + path).text
|
page = self.request(self.root + path).text
|
||||||
if "Not Found" in page:
|
if "Not Found" in page:
|
||||||
raise exception.NotFoundError("post")
|
raise self.exc.NotFoundError("post")
|
||||||
|
|
||||||
if model_name := text.extr(page, "<title>", " / "):
|
if model_name := text.extr(page, "<title>", " / "):
|
||||||
model_name = text.unescape(model_name)
|
model_name = text.unescape(model_name)
|
||||||
@@ -86,7 +86,7 @@ class ThefapModelExtractor(ThefapExtractor):
|
|||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
|
|
||||||
if 'id="content"' not in page:
|
if 'id="content"' not in page:
|
||||||
raise exception.NotFoundError("model")
|
raise self.exc.NotFoundError("model")
|
||||||
|
|
||||||
if model_name := text.extr(page, "<h2", "</h2>"):
|
if model_name := text.extr(page, "<h2", "</h2>"):
|
||||||
model_name = text.unescape(model_name[model_name.find(">")+1:])
|
model_name = text.unescape(model_name[model_name.find(">")+1:])
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://www.tiktok.com/"""
|
"""Extractors for https://www.tiktok.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, ytdl, exception
|
from .. import text, util, ytdl
|
||||||
import functools
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
import binascii
|
import binascii
|
||||||
@@ -166,7 +166,7 @@ class TiktokExtractor(Extractor):
|
|||||||
try:
|
try:
|
||||||
response = self.request(url)
|
response = self.request(url)
|
||||||
if response.history and "/login" in response.url:
|
if response.history and "/login" in response.url:
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
"HTTP redirect to login page "
|
"HTTP redirect to login page "
|
||||||
f"('{response.url.partition('?')[0]}')")
|
f"('{response.url.partition('?')[0]}')")
|
||||||
html = response.text
|
html = response.text
|
||||||
@@ -227,14 +227,14 @@ class TiktokExtractor(Extractor):
|
|||||||
data["webapp.app-context"]
|
data["webapp.app-context"]
|
||||||
data = data["webapp.user-detail"]
|
data = data["webapp.user-detail"]
|
||||||
if not self._check_status_code(data, profile_url, "profile"):
|
if not self._check_status_code(data, profile_url, "profile"):
|
||||||
raise exception.ExtractionError(
|
raise self.exc.ExtractionError(
|
||||||
f"{profile_url}: could not extract rehydration data")
|
f"{profile_url}: could not extract rehydration data")
|
||||||
try:
|
try:
|
||||||
for key in additional_keys:
|
for key in additional_keys:
|
||||||
data = data[key]
|
data = data[key]
|
||||||
except KeyError as exc:
|
except KeyError as exc:
|
||||||
self.log.traceback(exc)
|
self.log.traceback(exc)
|
||||||
raise exception.ExtractionError(
|
raise self.exc.ExtractionError(
|
||||||
"%s: could not extract rehydration data (%s)",
|
"%s: could not extract rehydration data (%s)",
|
||||||
profile_url, ", ".join(additional_keys))
|
profile_url, ", ".join(additional_keys))
|
||||||
return data
|
return data
|
||||||
@@ -258,7 +258,7 @@ class TiktokExtractor(Extractor):
|
|||||||
if test.digest() == expected:
|
if test.digest() == expected:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise exception.ExtractionError("failed to find matching digest")
|
raise self.exc.ExtractionError("failed to find matching digest")
|
||||||
|
|
||||||
# extract cookie names
|
# extract cookie names
|
||||||
wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"')
|
wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"')
|
||||||
@@ -278,7 +278,7 @@ class TiktokExtractor(Extractor):
|
|||||||
sec_uid = self._extract_id(
|
sec_uid = self._extract_id(
|
||||||
profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")
|
profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")
|
||||||
if sec_uid is None:
|
if sec_uid is None:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"{user_name}: unable to extract secondary user ID")
|
f"{user_name}: unable to extract secondary user ID")
|
||||||
return sec_uid
|
return sec_uid
|
||||||
|
|
||||||
@@ -286,7 +286,7 @@ class TiktokExtractor(Extractor):
|
|||||||
author_id = self._extract_id(
|
author_id = self._extract_id(
|
||||||
profile_url, user_name, r"[0-9]+", "id")
|
profile_url, user_name, r"[0-9]+", "id")
|
||||||
if author_id is None:
|
if author_id is None:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"{user_name}: unable to extract user ID")
|
f"{user_name}: unable to extract user ID")
|
||||||
return author_id
|
return author_id
|
||||||
|
|
||||||
@@ -306,7 +306,7 @@ class TiktokExtractor(Extractor):
|
|||||||
video = post["video"]
|
video = post["video"]
|
||||||
urls = self._extract_video_urls(video)
|
urls = self._extract_video_urls(video)
|
||||||
if not urls:
|
if not urls:
|
||||||
raise exception.ExtractionError(
|
raise self.exc.ExtractionError(
|
||||||
f"{post['id']}: Failed to extract video URLs. "
|
f"{post['id']}: Failed to extract video URLs. "
|
||||||
f"You may need cookies to continue.")
|
f"You may need cookies to continue.")
|
||||||
|
|
||||||
@@ -533,7 +533,7 @@ class TiktokVmpostExtractor(TiktokExtractor):
|
|||||||
url = self.request_location(url, headers=headers, notfound="post")
|
url = self.request_location(url, headers=headers, notfound="post")
|
||||||
if not url or len(url) <= 28:
|
if not url or len(url) <= 28:
|
||||||
# https://www.tiktok.com/?_r=1
|
# https://www.tiktok.com/?_r=1
|
||||||
raise exception.NotFoundError("post")
|
raise self.exc.NotFoundError("post")
|
||||||
|
|
||||||
data = {"_extractor": TiktokPostExtractor}
|
data = {"_extractor": TiktokPostExtractor}
|
||||||
yield Message.Queue, url.partition("?")[0], data
|
yield Message.Queue, url.partition("?")[0], data
|
||||||
@@ -944,7 +944,7 @@ class TiktokTimeCursor(TiktokPaginationCursor):
|
|||||||
elif not self.reverse and (new_cursor < self.cursor or no_cursor):
|
elif not self.reverse and (new_cursor < self.cursor or no_cursor):
|
||||||
new_cursor = self.fallback_cursor(data)
|
new_cursor = self.fallback_cursor(data)
|
||||||
elif no_cursor:
|
elif no_cursor:
|
||||||
raise exception.ExtractionError("Could not extract next cursor")
|
raise self.exc.ExtractionError("Could not extract next cursor")
|
||||||
self.cursor = new_cursor
|
self.cursor = new_cursor
|
||||||
return not data.get(self.has_more_key, False)
|
return not data.get(self.has_more_key, False)
|
||||||
|
|
||||||
@@ -1273,7 +1273,7 @@ class TiktokPaginationRequest:
|
|||||||
extractor.log.warning("%s: TikTok API keeps sending the same "
|
extractor.log.warning("%s: TikTok API keeps sending the same "
|
||||||
"page. Taking measures to avoid an infinite "
|
"page. Taking measures to avoid an infinite "
|
||||||
"loop", url)
|
"loop", url)
|
||||||
raise exception.ExtractionError(
|
raise self.exc.ExtractionError(
|
||||||
"TikTok API keeps sending the same page")
|
"TikTok API keeps sending the same page")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.tumblr.com/"""
|
"""Extractors for https://www.tumblr.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, dt, oauth, exception
|
from .. import text, util, dt, oauth
|
||||||
|
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
@@ -473,7 +473,7 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
self.log.debug(data)
|
self.log.debug(data)
|
||||||
|
|
||||||
if status == 403:
|
if status == 403:
|
||||||
raise exception.AuthorizationError()
|
raise self.exc.AuthorizationError()
|
||||||
|
|
||||||
elif status == 404:
|
elif status == 404:
|
||||||
try:
|
try:
|
||||||
@@ -492,8 +492,8 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
else:
|
else:
|
||||||
self.log.info("Run 'gallery-dl oauth:tumblr' "
|
self.log.info("Run 'gallery-dl oauth:tumblr' "
|
||||||
"to access dashboard-only blogs")
|
"to access dashboard-only blogs")
|
||||||
raise exception.AuthorizationError(error)
|
raise self.exc.AuthorizationError(error)
|
||||||
raise exception.NotFoundError("user or post")
|
raise self.exc.NotFoundError("user or post")
|
||||||
|
|
||||||
elif status == 429:
|
elif status == 429:
|
||||||
# daily rate limit
|
# daily rate limit
|
||||||
@@ -514,7 +514,7 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
t = (dt.now() + dt.timedelta(0, float(reset))).time()
|
t = (dt.now() + dt.timedelta(0, float(reset))).time()
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Aborting - Rate limit will reset at "
|
f"Aborting - Rate limit will reset at "
|
||||||
f"{t.hour:02}:{t.minute:02}:{t.second:02}")
|
f"{t.hour:02}:{t.minute:02}:{t.second:02}")
|
||||||
|
|
||||||
@@ -524,7 +524,7 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
self.extractor.wait(seconds=reset)
|
self.extractor.wait(seconds=reset)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raise exception.AbortExtraction(data)
|
raise self.exc.AbortExtraction(data)
|
||||||
|
|
||||||
def _pagination(self, endpoint, params,
|
def _pagination(self, endpoint, params,
|
||||||
blog=None, key="posts", cache=False):
|
blog=None, key="posts", cache=False):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://twibooru.org/"""
|
"""Extractors for https://twibooru.org/"""
|
||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?twibooru\.org"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?twibooru\.org"
|
||||||
@@ -153,7 +153,7 @@ class TwibooruAPI():
|
|||||||
|
|
||||||
# error
|
# error
|
||||||
self.extractor.log.debug(response.content)
|
self.extractor.log.debug(response.content)
|
||||||
raise exception.HttpError("", response)
|
raise self.exc.HttpError("", response)
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
extr = self.extractor
|
extr = self.extractor
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://x.com/"""
|
"""Extractors for https://x.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, dt, exception
|
from .. import text, util, dt
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import itertools
|
import itertools
|
||||||
import random
|
import random
|
||||||
@@ -906,7 +906,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
|
|||||||
return self.api.user_media
|
return self.api.user_media
|
||||||
if strategy == "with_replies":
|
if strategy == "with_replies":
|
||||||
return self.api.user_tweets_and_replies
|
return self.api.user_tweets_and_replies
|
||||||
raise exception.AbortExtraction(f"Invalid strategy '{strategy}'")
|
raise self.exc.AbortExtraction(f"Invalid strategy '{strategy}'")
|
||||||
|
|
||||||
|
|
||||||
class TwitterTweetsExtractor(TwitterExtractor):
|
class TwitterTweetsExtractor(TwitterExtractor):
|
||||||
@@ -1092,7 +1092,7 @@ class TwitterTweetExtractor(TwitterExtractor):
|
|||||||
try:
|
try:
|
||||||
self._assign_user(tweet["core"]["user_results"]["result"])
|
self._assign_user(tweet["core"]["user_results"]["result"])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"'{tweet.get('reason') or 'Unavailable'}'")
|
f"'{tweet.get('reason') or 'Unavailable'}'")
|
||||||
|
|
||||||
yield tweet
|
yield tweet
|
||||||
@@ -1403,10 +1403,10 @@ class TwitterAPI():
|
|||||||
if tweet.get("__typename") == "TweetUnavailable":
|
if tweet.get("__typename") == "TweetUnavailable":
|
||||||
reason = tweet.get("reason")
|
reason = tweet.get("reason")
|
||||||
if reason in {"NsfwViewerHasNoStatedAge", "NsfwLoggedOut"}:
|
if reason in {"NsfwViewerHasNoStatedAge", "NsfwLoggedOut"}:
|
||||||
raise exception.AuthRequired(message="NSFW Tweet")
|
raise self.exc.AuthRequired(message="NSFW Tweet")
|
||||||
if reason == "Protected":
|
if reason == "Protected":
|
||||||
raise exception.AuthRequired(message="Protected Tweet")
|
raise self.exc.AuthRequired(message="Protected Tweet")
|
||||||
raise exception.AbortExtraction(f"Tweet unavailable ('{reason}')")
|
raise self.exc.AbortExtraction(f"Tweet unavailable ('{reason}')")
|
||||||
|
|
||||||
return tweet
|
return tweet
|
||||||
|
|
||||||
@@ -1754,9 +1754,9 @@ class TwitterAPI():
|
|||||||
return user["rest_id"]
|
return user["rest_id"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if user and user.get("__typename") == "UserUnavailable":
|
if user and user.get("__typename") == "UserUnavailable":
|
||||||
raise exception.NotFoundError(user["message"], False)
|
raise self.exc.NotFoundError(user["message"], False)
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError("user")
|
raise self.exc.NotFoundError("user")
|
||||||
|
|
||||||
@cache(maxage=3600)
|
@cache(maxage=3600)
|
||||||
def _guest_token(self):
|
def _guest_token(self):
|
||||||
@@ -1835,13 +1835,13 @@ class TwitterAPI():
|
|||||||
if "this account is temporarily locked" in msg:
|
if "this account is temporarily locked" in msg:
|
||||||
msg = "Account temporarily locked"
|
msg = "Account temporarily locked"
|
||||||
if self.extractor.config("locked") != "wait":
|
if self.extractor.config("locked") != "wait":
|
||||||
raise exception.AuthorizationError(msg)
|
raise self.exc.AuthorizationError(msg)
|
||||||
self.log.warning(msg)
|
self.log.warning(msg)
|
||||||
self.extractor.input("Press ENTER to retry.")
|
self.extractor.input("Press ENTER to retry.")
|
||||||
retry = True
|
retry = True
|
||||||
|
|
||||||
elif "Could not authenticate you" in msg:
|
elif "Could not authenticate you" in msg:
|
||||||
raise exception.AbortExtraction(f"'{msg}'")
|
raise self.exc.AbortExtraction(f"'{msg}'")
|
||||||
|
|
||||||
elif msg.lower().startswith("timeout"):
|
elif msg.lower().startswith("timeout"):
|
||||||
retry = True
|
retry = True
|
||||||
@@ -1858,7 +1858,7 @@ class TwitterAPI():
|
|||||||
return data
|
return data
|
||||||
elif response.status_code in {403, 404} and \
|
elif response.status_code in {403, 404} and \
|
||||||
not self.headers["x-twitter-auth-type"]:
|
not self.headers["x-twitter-auth-type"]:
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
"authenticated cookies", "timeline")
|
"authenticated cookies", "timeline")
|
||||||
elif response.status_code == 429:
|
elif response.status_code == 429:
|
||||||
self._handle_ratelimit(response)
|
self._handle_ratelimit(response)
|
||||||
@@ -1870,7 +1870,7 @@ class TwitterAPI():
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"{response.status_code} {response.reason} ({errors})")
|
f"{response.status_code} {response.reason} ({errors})")
|
||||||
|
|
||||||
def _pagination_rest(self, endpoint, params):
|
def _pagination_rest(self, endpoint, params):
|
||||||
@@ -2065,13 +2065,13 @@ class TwitterAPI():
|
|||||||
self.headers["x-twitter-auth-type"] = None
|
self.headers["x-twitter-auth-type"] = None
|
||||||
extr.log.info("Retrying API request as guest")
|
extr.log.info("Retrying API request as guest")
|
||||||
continue
|
continue
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
user["screen_name"] + " blocked your account")
|
user["screen_name"] + " blocked your account")
|
||||||
elif user.get("protected"):
|
elif user.get("protected"):
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
user["screen_name"] + "'s Tweets are protected")
|
user["screen_name"] + "'s Tweets are protected")
|
||||||
|
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"Unable to retrieve Tweets from this timeline")
|
"Unable to retrieve Tweets from this timeline")
|
||||||
|
|
||||||
tweets = []
|
tweets = []
|
||||||
@@ -2301,7 +2301,7 @@ class TwitterAPI():
|
|||||||
def _handle_ratelimit(self, response):
|
def _handle_ratelimit(self, response):
|
||||||
rl = self.extractor.config("ratelimit")
|
rl = self.extractor.config("ratelimit")
|
||||||
if rl == "abort":
|
if rl == "abort":
|
||||||
raise exception.AbortExtraction("Rate limit exceeded")
|
raise self.exc.AbortExtraction("Rate limit exceeded")
|
||||||
|
|
||||||
until = response.headers.get("x-rate-limit-reset")
|
until = response.headers.get("x-rate-limit-reset")
|
||||||
seconds = None if until else 60.0
|
seconds = None if until else 60.0
|
||||||
@@ -2313,7 +2313,7 @@ class TwitterAPI():
|
|||||||
num = text.parse_int(num)
|
num = text.parse_int(num)
|
||||||
msg = f"Rate limit exceeded ({amt}/{num})"
|
msg = f"Rate limit exceeded ({amt}/{num})"
|
||||||
if amt >= num:
|
if amt >= num:
|
||||||
raise exception.AbortExtraction(msg)
|
raise self.exc.AbortExtraction(msg)
|
||||||
self.log.warning(msg)
|
self.log.warning(msg)
|
||||||
self._ratelimit_amt = amt + 1
|
self._ratelimit_amt = amt + 1
|
||||||
elif rl == "wait":
|
elif rl == "wait":
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"""Extractors for https://urlgalleries.net/"""
|
"""Extractors for https://urlgalleries.net/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor, Message
|
from .common import GalleryExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class UrlgalleriesGalleryExtractor(GalleryExtractor):
|
class UrlgalleriesGalleryExtractor(GalleryExtractor):
|
||||||
@@ -29,8 +29,8 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
|
|||||||
if 300 <= response.status_code < 500:
|
if 300 <= response.status_code < 500:
|
||||||
if response.headers.get("location", "").endswith(
|
if response.headers.get("location", "").endswith(
|
||||||
"/not_found_adult.php"):
|
"/not_found_adult.php"):
|
||||||
raise exception.NotFoundError("gallery")
|
raise self.exc.NotFoundError("gallery")
|
||||||
raise exception.HttpError(None, response)
|
raise self.exc.HttpError(None, response)
|
||||||
page = response.text
|
page = response.text
|
||||||
|
|
||||||
imgs = self.images(page)
|
imgs = self.images(page)
|
||||||
|
|||||||
@@ -7,7 +7,6 @@
|
|||||||
"""Extractors for general-purpose URL shorteners"""
|
"""Extractors for general-purpose URL shorteners"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import exception
|
|
||||||
|
|
||||||
|
|
||||||
class UrlshortenerExtractor(BaseExtractor):
|
class UrlshortenerExtractor(BaseExtractor):
|
||||||
@@ -40,5 +39,5 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor):
|
|||||||
location = self.request_location(
|
location = self.request_location(
|
||||||
url, headers=self.config_instance("headers"), notfound="URL")
|
url, headers=self.config_instance("headers"), notfound="URL")
|
||||||
if not location:
|
if not location:
|
||||||
raise exception.AbortExtraction("Unable to resolve short URL")
|
raise self.exc.AbortExtraction("Unable to resolve short URL")
|
||||||
yield Message.Queue, location, {}
|
yield Message.Queue, location, {}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://vipergirls.to/"""
|
"""Extractors for https://vipergirls.to/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
|
||||||
@@ -104,7 +104,7 @@ class VipergirlsExtractor(Extractor):
|
|||||||
|
|
||||||
response = self.request(url, method="POST", data=data)
|
response = self.request(url, method="POST", data=data)
|
||||||
if not response.cookies.get("vg_password"):
|
if not response.cookies.get("vg_password"):
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
return {cookie.name: cookie.value
|
return {cookie.name: cookie.value
|
||||||
for cookie in response.cookies}
|
for cookie in response.cookies}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://vk.com/"""
|
"""Extractors for https://vk.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
|
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
|
||||||
|
|
||||||
@@ -100,13 +100,13 @@ class VkExtractor(Extractor):
|
|||||||
response = self.request(
|
response = self.request(
|
||||||
url, method="POST", headers=headers, data=data)
|
url, method="POST", headers=headers, data=data)
|
||||||
if response.history and "/challenge.html" in response.url:
|
if response.history and "/challenge.html" in response.url:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
"HTTP redirect to 'challenge' page:\n" + response.url)
|
"HTTP redirect to 'challenge' page:\n" + response.url)
|
||||||
|
|
||||||
payload = response.json()["payload"][1]
|
payload = response.json()["payload"][1]
|
||||||
if len(payload) < 4:
|
if len(payload) < 4:
|
||||||
self.log.debug(payload)
|
self.log.debug(payload)
|
||||||
raise exception.AuthorizationError(
|
raise self.exc.AuthorizationError(
|
||||||
text.unescape(payload[0]) if payload[0] else None)
|
text.unescape(payload[0]) if payload[0] else None)
|
||||||
|
|
||||||
total = payload[1]
|
total = payload[1]
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://wallhaven.cc/"""
|
"""Extractors for https://wallhaven.cc/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class WallhavenExtractor(Extractor):
|
class WallhavenExtractor(Extractor):
|
||||||
@@ -199,7 +199,7 @@ class WallhavenAPI():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
self.extractor.log.debug("Server response: %s", response.text)
|
self.extractor.log.debug("Server response: %s", response.text)
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"API request failed "
|
f"API request failed "
|
||||||
f"({response.status_code} {response.reason})")
|
f"({response.status_code} {response.reason})")
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"""Extractors for https://www.webtoons.com/"""
|
"""Extractors for https://www.webtoons.com/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor, Extractor, Message
|
from .common import GalleryExtractor, Extractor, Message
|
||||||
from .. import exception, text, util
|
from .. import text, util
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com"
|
||||||
LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
|
LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
|
||||||
@@ -40,7 +40,7 @@ class WebtoonsBase():
|
|||||||
def request(self, url, **kwargs):
|
def request(self, url, **kwargs):
|
||||||
response = Extractor.request(self, url, **kwargs)
|
response = Extractor.request(self, url, **kwargs)
|
||||||
if response.history and "/ageGate" in response.url:
|
if response.history and "/ageGate" in response.url:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"HTTP redirect to age gate check ('{response.url}')")
|
f"HTTP redirect to age gate check ('{response.url}')")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://www.weibo.com/"""
|
"""Extractors for https://www.weibo.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@@ -65,7 +65,7 @@ class WeiboExtractor(Extractor):
|
|||||||
|
|
||||||
if response.history:
|
if response.history:
|
||||||
if "login.sina.com" in response.url:
|
if "login.sina.com" in response.url:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"HTTP redirect to login page "
|
f"HTTP redirect to login page "
|
||||||
f"({response.url.partition('?')[0]})")
|
f"({response.url.partition('?')[0]})")
|
||||||
if "passport.weibo.com" in response.url:
|
if "passport.weibo.com" in response.url:
|
||||||
@@ -189,7 +189,7 @@ class WeiboExtractor(Extractor):
|
|||||||
not text.ext_from_url(video["url"]):
|
not text.ext_from_url(video["url"]):
|
||||||
try:
|
try:
|
||||||
video["url"] = self.request_location(video["url"])
|
video["url"] = self.request_location(video["url"])
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
self.log.warning("%s: %s", exc.__class__.__name__, exc)
|
self.log.warning("%s: %s", exc.__class__.__name__, exc)
|
||||||
video["url"] = ""
|
video["url"] = ""
|
||||||
|
|
||||||
@@ -230,7 +230,7 @@ class WeiboExtractor(Extractor):
|
|||||||
if not data.get("ok"):
|
if not data.get("ok"):
|
||||||
self.log.debug(response.content)
|
self.log.debug(response.content)
|
||||||
if "since_id" not in params: # first iteration
|
if "since_id" not in params: # first iteration
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f'"{data.get("msg") or "unknown error"}"')
|
f'"{data.get("msg") or "unknown error"}"')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -479,14 +479,14 @@ class WeiboAlbumExtractor(WeiboExtractor):
|
|||||||
try:
|
try:
|
||||||
sub = subalbums[int(subalbum)-1]
|
sub = subalbums[int(subalbum)-1]
|
||||||
except Exception:
|
except Exception:
|
||||||
raise exception.NotFoundError("subalbum")
|
raise self.exc.NotFoundError("subalbum")
|
||||||
else:
|
else:
|
||||||
subalbum = text.unquote(subalbum)
|
subalbum = text.unquote(subalbum)
|
||||||
for sub in subalbums:
|
for sub in subalbums:
|
||||||
if sub["pic_title"] == subalbum:
|
if sub["pic_title"] == subalbum:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise exception.NotFoundError("subalbum")
|
raise self.exc.NotFoundError("subalbum")
|
||||||
return ((sub, self._pagination_subalbum(uid, sub)),)
|
return ((sub, self._pagination_subalbum(uid, sub)),)
|
||||||
|
|
||||||
def _pagination_subalbum(self, uid, sub):
|
def _pagination_subalbum(self, uid, sub):
|
||||||
@@ -504,7 +504,7 @@ class WeiboStatusExtractor(WeiboExtractor):
|
|||||||
status = self._status_by_id(self.user)
|
status = self._status_by_id(self.user)
|
||||||
if status.get("ok") != 1:
|
if status.get("ok") != 1:
|
||||||
self.log.debug(status)
|
self.log.debug(status)
|
||||||
raise exception.NotFoundError("status")
|
raise self.exc.NotFoundError("status")
|
||||||
return (status,)
|
return (status,)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"""Extractors for Wikimedia sites"""
|
"""Extractors for Wikimedia sites"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -71,7 +71,7 @@ class WikimediaExtractor(BaseExtractor):
|
|||||||
response = self.request(url, method="HEAD", fatal=None)
|
response = self.request(url, method="HEAD", fatal=None)
|
||||||
if response.status_code < 400:
|
if response.status_code < 400:
|
||||||
return url
|
return url
|
||||||
raise exception.AbortExtraction("Unable to find API endpoint")
|
raise self.exc.AbortExtraction("Unable to find API endpoint")
|
||||||
|
|
||||||
def prepare_info(self, info):
|
def prepare_info(self, info):
|
||||||
"""Adjust the content of an image info object"""
|
"""Adjust the content of an image info object"""
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for XenForo forums"""
|
"""Extractors for XenForo forums"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import binascii
|
import binascii
|
||||||
|
|
||||||
@@ -165,7 +165,7 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
def request_page(self, url):
|
def request_page(self, url):
|
||||||
try:
|
try:
|
||||||
return self.request(url)
|
return self.request(url)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 403 and b">Log in<" in exc.response.content:
|
if exc.status == 403 and b">Log in<" in exc.response.content:
|
||||||
self._require_auth(exc.response)
|
self._require_auth(exc.response)
|
||||||
raise
|
raise
|
||||||
@@ -197,7 +197,7 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
if not response.history:
|
if not response.history:
|
||||||
err = self._extract_error(response.text)
|
err = self._extract_error(response.text)
|
||||||
err = f'"{err}"' if err else None
|
err = f'"{err}"' if err else None
|
||||||
raise exception.AuthenticationError(err)
|
raise self.exc.AuthenticationError(err)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
cookie.name: cookie.value
|
cookie.name: cookie.value
|
||||||
@@ -420,7 +420,7 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
return main["contentUrl"], media
|
return main["contentUrl"], media
|
||||||
|
|
||||||
def _require_auth(self, response=None):
|
def _require_auth(self, response=None):
|
||||||
raise exception.AuthRequired(
|
raise self.exc.AuthRequired(
|
||||||
("username & password", "authenticated cookies"), None,
|
("username & password", "authenticated cookies"), None,
|
||||||
None if response is None else self._extract_error(response.text))
|
None if response is None else self._extract_error(response.text))
|
||||||
|
|
||||||
@@ -473,7 +473,7 @@ class XenforoPostExtractor(XenforoExtractor):
|
|||||||
|
|
||||||
pos = page.find(f'data-content="post-{post_id}"')
|
pos = page.find(f'data-content="post-{post_id}"')
|
||||||
if pos < 0:
|
if pos < 0:
|
||||||
raise exception.NotFoundError("post")
|
raise self.exc.NotFoundError("post")
|
||||||
html = text.extract(page, "<article ", "<footer", pos-200)[0]
|
html = text.extract(page, "<article ", "<footer", pos-200)[0]
|
||||||
|
|
||||||
self._parse_thread(page)
|
self._parse_thread(page)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://xfolio.jp/"""
|
"""Extractors for https://xfolio.jp/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?xfolio\.jp(?:/[^/?#]+)?"
|
BASE_PATTERN = r"(?:https?://)?xfolio\.jp(?:/[^/?#]+)?"
|
||||||
|
|
||||||
@@ -38,7 +38,7 @@ class XfolioExtractor(Extractor):
|
|||||||
response = Extractor.request(self, url, **kwargs)
|
response = Extractor.request(self, url, **kwargs)
|
||||||
|
|
||||||
if "/system/recaptcha" in response.url:
|
if "/system/recaptcha" in response.url:
|
||||||
raise exception.AbortExtraction("Bot check / CAPTCHA page")
|
raise self.exc.AbortExtraction("Bot check / CAPTCHA page")
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for sites supported by youtube-dl"""
|
"""Extractors for sites supported by youtube-dl"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import ytdl, config, exception
|
from .. import ytdl, config
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLExtractor(Extractor):
|
class YoutubeDLExtractor(Extractor):
|
||||||
@@ -39,7 +39,7 @@ class YoutubeDLExtractor(Extractor):
|
|||||||
self.ytdl_ie_key = ie.ie_key()
|
self.ytdl_ie_key = ie.ie_key()
|
||||||
break
|
break
|
||||||
if not generic and self.ytdl_ie_key == "Generic":
|
if not generic and self.ytdl_ie_key == "Generic":
|
||||||
raise exception.NoExtractorError()
|
raise self.exc.NoExtractorError()
|
||||||
self.force_generic_extractor = False
|
self.force_generic_extractor = False
|
||||||
|
|
||||||
if self.ytdl_ie_key == "Generic" and config.interpolate(
|
if self.ytdl_ie_key == "Generic" and config.interpolate(
|
||||||
@@ -94,9 +94,9 @@ class YoutubeDLExtractor(Extractor):
|
|||||||
ytdl_instance.get_info_extractor(self.ytdl_ie_key),
|
ytdl_instance.get_info_extractor(self.ytdl_ie_key),
|
||||||
False, {}, True)
|
False, {}, True)
|
||||||
# except ytdl_module.utils.YoutubeDLError:
|
# except ytdl_module.utils.YoutubeDLError:
|
||||||
# raise exception.AbortExtraction("Failed to extract video data")
|
# raise self.exc.AbortExtraction("Failed to extract video data")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise exception.AbortExtraction(
|
raise self.exc.AbortExtraction(
|
||||||
f"Failed to extract video data "
|
f"Failed to extract video data "
|
||||||
f"({exc.__class__.__name__}: {exc})")
|
f"({exc.__class__.__name__}: {exc})")
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from .. import text, util, exception
|
from .. import text, util
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
|
||||||
@@ -59,7 +59,7 @@ class ZerochanExtractor(BooruExtractor):
|
|||||||
response = self.request(
|
response = self.request(
|
||||||
url, method="POST", headers=headers, data=data, expected=(500,))
|
url, method="POST", headers=headers, data=data, expected=(500,))
|
||||||
if not response.history:
|
if not response.history:
|
||||||
raise exception.AuthenticationError()
|
raise self.exc.AuthenticationError()
|
||||||
|
|
||||||
return response.cookies
|
return response.cookies
|
||||||
|
|
||||||
@@ -196,7 +196,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
try:
|
try:
|
||||||
page = self.request(
|
page = self.request(
|
||||||
url, params=params, expected=(500,)).text
|
url, params=params, expected=(500,)).text
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 404:
|
if exc.status == 404:
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
@@ -241,7 +241,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
try:
|
try:
|
||||||
response = self.request(
|
response = self.request(
|
||||||
url, params=params, allow_redirects=False)
|
url, params=params, allow_redirects=False)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status == 404:
|
if exc.status == 404:
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
@@ -251,7 +251,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
self.log.warning("HTTP redirect to %s", url)
|
self.log.warning("HTTP redirect to %s", url)
|
||||||
if self.config("redirects"):
|
if self.config("redirects"):
|
||||||
continue
|
continue
|
||||||
raise exception.AbortExtraction()
|
raise self.exc.AbortExtraction()
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
try:
|
try:
|
||||||
@@ -293,7 +293,7 @@ class ZerochanImageExtractor(ZerochanExtractor):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
post = self._parse_entry_html(image_id)
|
post = self._parse_entry_html(image_id)
|
||||||
except exception.HttpError as exc:
|
except self.exc.HttpError as exc:
|
||||||
if exc.status in {404, 410}:
|
if exc.status in {404, 410}:
|
||||||
if msg := text.extr(exc.response.text, "<h2>", "<"):
|
if msg := text.extr(exc.response.text, "<h2>", "<"):
|
||||||
self.log.warning(f"'{msg}'")
|
self.log.warning(f"'{msg}'")
|
||||||
|
|||||||
Reference in New Issue
Block a user