improve 'extractor.request'
- add 'fatal' argument - improve internal logic and flow - raise known exception on error - update exception hierarchy
This commit is contained in:
@@ -6,22 +6,49 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Exception classes used by gallery-dl
|
||||
|
||||
class NoExtractorError(Exception):
|
||||
"""No extractor can handle the given URL"""
|
||||
Class Hierarchy:
|
||||
|
||||
Exception
|
||||
+-- GalleryDLException
|
||||
+-- ExtractionError
|
||||
| +-- AuthenticationError
|
||||
| +-- AuthorizationError
|
||||
| +-- NotFoundError
|
||||
| +-- HttpError
|
||||
+-- NoExtractorError
|
||||
+-- StopExtraction
|
||||
"""
|
||||
|
||||
|
||||
class AuthenticationError(Exception):
|
||||
class GalleryDLException(Exception):
|
||||
"""Base class for GalleryDL exceptions"""
|
||||
|
||||
|
||||
class ExtractionError(GalleryDLException):
|
||||
"""Base class for exceptions during information extraction"""
|
||||
|
||||
|
||||
class AuthenticationError(ExtractionError):
|
||||
"""Invalid or missing login information"""
|
||||
|
||||
|
||||
class AuthorizationError(Exception):
|
||||
class AuthorizationError(ExtractionError):
|
||||
"""Insufficient privileges to access a resource"""
|
||||
|
||||
|
||||
class NotFoundError(Exception):
|
||||
class NotFoundError(ExtractionError):
|
||||
"""Requested resource (gallery/image) does not exist"""
|
||||
|
||||
|
||||
class StopExtraction(Exception):
|
||||
class HttpError(ExtractionError):
|
||||
"""HTTP request during extraction failed"""
|
||||
|
||||
|
||||
class NoExtractorError(GalleryDLException):
|
||||
"""No extractor can handle the given URL"""
|
||||
|
||||
|
||||
class StopExtraction(GalleryDLException):
|
||||
"""Extraction should stop"""
|
||||
|
||||
@@ -107,7 +107,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
|
||||
"p": 1,
|
||||
"supress_webtoon": "t",
|
||||
}
|
||||
response = self.session.get(self.reader_url, params=params)
|
||||
response = self.request(self.reader_url, params=params, fatal=False)
|
||||
if response.status_code == 405:
|
||||
error = text.extract(response.text, "ERROR [", "]")[0]
|
||||
if error == "10030":
|
||||
|
||||
@@ -18,7 +18,7 @@ import requests
|
||||
import threading
|
||||
import http.cookiejar
|
||||
from .message import Message
|
||||
from .. import config
|
||||
from .. import config, exception
|
||||
|
||||
|
||||
class Extractor():
|
||||
@@ -47,11 +47,22 @@ class Extractor():
|
||||
return config.interpolate(
|
||||
("extractor", self.category, self.subcategory, key), default)
|
||||
|
||||
def request(self, url, encoding=None, *args, **kwargs):
|
||||
response = safe_request(self.session, url, *args, **kwargs)
|
||||
if encoding:
|
||||
response.encoding = encoding
|
||||
return response
|
||||
def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
|
||||
*args, **kwargs):
|
||||
while True:
|
||||
try:
|
||||
response = self.session.request(method, url, *args, **kwargs)
|
||||
if fatal:
|
||||
response.raise_for_status()
|
||||
if encoding:
|
||||
response.encoding = encoding
|
||||
return response
|
||||
except requests.exceptions.RequestException as exc:
|
||||
msg = exc
|
||||
retries -= 1
|
||||
if not retries:
|
||||
raise exception.HttpError(msg)
|
||||
time.sleep(1)
|
||||
|
||||
def _get_auth_info(self):
|
||||
"""Return authentication information as (username, password) tuple"""
|
||||
@@ -164,33 +175,8 @@ class MangaExtractor(Extractor):
|
||||
return []
|
||||
|
||||
|
||||
def safe_request(session, url, method="GET", *args, **kwargs):
|
||||
tries = 0
|
||||
while True:
|
||||
# try to connect to remote source
|
||||
try:
|
||||
r = session.request(method, url, *args, **kwargs)
|
||||
except requests.exceptions.ConnectionError:
|
||||
tries += 1
|
||||
time.sleep(1)
|
||||
if tries == 5:
|
||||
raise
|
||||
continue
|
||||
|
||||
# reject error-status-codes
|
||||
if r.status_code != requests.codes.ok:
|
||||
tries += 1
|
||||
time.sleep(1)
|
||||
if tries == 5:
|
||||
r.raise_for_status()
|
||||
continue
|
||||
|
||||
# everything ok -- proceed to download
|
||||
return r
|
||||
|
||||
|
||||
# Reduce strictness of the expected magic string in cookie jar files.
|
||||
# (This allows the use of Wget-generated cookiejar files without modification)
|
||||
# Reduce strictness of the expected magic string in cookiejar files.
|
||||
# (This allows the use of Wget-generated cookiejars without modification)
|
||||
|
||||
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
|
||||
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
|
||||
|
||||
@@ -242,7 +242,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
||||
self.url = "https://" + match.group(1)
|
||||
|
||||
def deviations(self):
|
||||
response = self.session.get(self.url)
|
||||
response = self.request(self.url, fatal=False)
|
||||
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
|
||||
if response.status_code != 200 or not deviation_id:
|
||||
raise exception.NotFoundError("image")
|
||||
|
||||
@@ -56,7 +56,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
yield Message.Version, 1
|
||||
|
||||
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
|
||||
response = self.session.get(url)
|
||||
response = self.request(url, fatal=False)
|
||||
page = response.text
|
||||
if response.status_code == 404 and "Gallery Not Available" in page:
|
||||
raise exception.AuthorizationError()
|
||||
@@ -196,7 +196,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
"""Actual login implementation"""
|
||||
self.log.info("Logging in as %s", username)
|
||||
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
|
||||
params = {
|
||||
data = {
|
||||
"CookieDate": "1",
|
||||
"b": "d",
|
||||
"bt": "1-1",
|
||||
@@ -206,7 +206,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
}
|
||||
referer = "https://e-hentai.org/bounce_login.php?b=d&bt=1-1"
|
||||
self.session.headers["Referer"] = referer
|
||||
response = self.session.post(url, data=params)
|
||||
response = self.request(url, method="POST", data=data)
|
||||
|
||||
if "You are now logged in as:" not in response.text:
|
||||
raise exception.AuthenticationError()
|
||||
|
||||
@@ -65,7 +65,7 @@ class GfycatImageExtractor(GfycatExtractor):
|
||||
|
||||
def _get_info(self, gfycat_id):
|
||||
url = "https://gfycat.com/cajax/get/" + gfycat_id
|
||||
data = self.session.get(url).json()
|
||||
data = self.request(url).json()
|
||||
if "error" in data:
|
||||
raise exception.NotFoundError()
|
||||
raise exception.NotFoundError("animation")
|
||||
return data["gfyItem"]
|
||||
|
||||
@@ -62,7 +62,7 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
url = self.url_base + self.artist + "?enterAgree=1"
|
||||
response = self.session.get(url)
|
||||
response = self.request(url, fatal=False)
|
||||
if response.status_code == 404:
|
||||
raise exception.NotFoundError("user")
|
||||
page = response.text
|
||||
@@ -150,7 +150,7 @@ class HentaifoundryImageExtractor(Extractor):
|
||||
"""Collect metadata for an image"""
|
||||
url = "https://www.hentai-foundry.com/pictures/user/{}/{}".format(
|
||||
self.artist, self.index)
|
||||
response = self.session.get(url + "?enterAgree=1")
|
||||
response = self.request(url + "?enterAgree=1", fatal=False)
|
||||
if response.status_code == 404:
|
||||
raise exception.NotFoundError("image")
|
||||
extr = text.extract
|
||||
|
||||
@@ -23,7 +23,7 @@ class ImgurExtractor(Extractor):
|
||||
self.mp4 = self.config("mp4", True)
|
||||
|
||||
def _get_data(self, urlpart):
|
||||
response = self.session.get("https://imgur.com/" + urlpart)
|
||||
response = self.request("https://imgur.com/" + urlpart, fatal=False)
|
||||
if response.status_code == 404:
|
||||
raise exception.NotFoundError(self.subcategory)
|
||||
data = text.extract(response.text, "image : ", ",\n")[0]
|
||||
|
||||
@@ -71,9 +71,9 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
def _login_impl(self, username, password):
|
||||
"""Actual login implementation"""
|
||||
self.log.info("Logging in as %s", username)
|
||||
params = {"email": username, "password": password}
|
||||
page = self.session.post("https://nijie.info/login_int.php",
|
||||
data=params).text
|
||||
data = {"email": username, "password": password}
|
||||
page = self.request("https://nijie.info/login_int.php",
|
||||
method="POST", data=data).text
|
||||
if "//nijie.info/login.php" in page:
|
||||
raise exception.AuthenticationError()
|
||||
return self.session.cookies
|
||||
@@ -102,7 +102,7 @@ class NijieUserExtractor(NijieExtractor):
|
||||
params = {"id": self.artist_id, "p": 1}
|
||||
url = "https://nijie.info/members_illust.php"
|
||||
while True:
|
||||
response = self.session.get(url, params=params)
|
||||
response = self.request(url, params=params, fatal=False)
|
||||
if response.status_code == 404:
|
||||
raise exception.NotFoundError("artist")
|
||||
ids = list(text.extract_iter(response.text, ' illust_id="', '"'))
|
||||
@@ -133,8 +133,8 @@ class NijieImageExtractor(NijieExtractor):
|
||||
self.page = ""
|
||||
|
||||
def get_job_metadata(self):
|
||||
response = self.session.get(self.popup_url + self.image_id,
|
||||
allow_redirects=False)
|
||||
response = self.request(self.popup_url + self.image_id,
|
||||
allow_redirects=False)
|
||||
if 300 <= response.status_code < 400:
|
||||
raise exception.NotFoundError("image")
|
||||
self.page = response.text
|
||||
|
||||
@@ -135,12 +135,10 @@ class MastodonAPI():
|
||||
"""Get an account's statuses"""
|
||||
url = "{}/api/v1/accounts/{}/statuses?only_media=1".format(
|
||||
self.root, account_id)
|
||||
while True:
|
||||
while url:
|
||||
response = self.session.get(url)
|
||||
yield from self._parse(response)
|
||||
url = response.links.get("next", {}).get("url")
|
||||
if not url:
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
def _parse(response):
|
||||
|
||||
@@ -57,8 +57,8 @@ class SeigaExtractor(Extractor):
|
||||
"""Actual login implementation"""
|
||||
self.log.info("Logging in as %s", username)
|
||||
url = "https://account.nicovideo.jp/api/v1/login"
|
||||
params = {"mail_tel": username, "password": password}
|
||||
self.session.post(url, data=params).close()
|
||||
data = {"mail_tel": username, "password": password}
|
||||
self.request(url, method="POST", data=data)
|
||||
if "user_session" not in self.session.cookies:
|
||||
raise exception.AuthenticationError()
|
||||
del self.session.cookies["nicosid"]
|
||||
|
||||
@@ -49,9 +49,11 @@ class Job():
|
||||
except exception.AuthorizationError:
|
||||
log.error("You do not have permission to access the resource "
|
||||
"at '%s'", self.url)
|
||||
except exception.NotFoundError as err:
|
||||
res = str(err) or "resource (gallery/image/user)"
|
||||
except exception.NotFoundError as exc:
|
||||
res = str(exc) or "resource (gallery/image/user)"
|
||||
log.error("The %s at '%s' does not exist", res, self.url)
|
||||
except exception.HttpError as exc:
|
||||
log.error("HTTP request failed:\n%s", exc)
|
||||
except exception.StopExtraction:
|
||||
pass
|
||||
except Exception as exc:
|
||||
|
||||
Reference in New Issue
Block a user