improve 'extractor.request'
- add 'fatal' argument - improve internal logic and flow - raise known exception on error - update exception hierarchy
This commit is contained in:
@@ -6,22 +6,49 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Exception classes used by gallery-dl
|
||||||
|
|
||||||
class NoExtractorError(Exception):
|
Class Hierarchy:
|
||||||
"""No extractor can handle the given URL"""
|
|
||||||
|
Exception
|
||||||
|
+-- GalleryDLException
|
||||||
|
+-- ExtractionError
|
||||||
|
| +-- AuthenticationError
|
||||||
|
| +-- AuthorizationError
|
||||||
|
| +-- NotFoundError
|
||||||
|
| +-- HttpError
|
||||||
|
+-- NoExtractorError
|
||||||
|
+-- StopExtraction
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class AuthenticationError(Exception):
|
class GalleryDLException(Exception):
|
||||||
|
"""Base class for GalleryDL exceptions"""
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractionError(GalleryDLException):
|
||||||
|
"""Base class for exceptions during information extraction"""
|
||||||
|
|
||||||
|
|
||||||
|
class AuthenticationError(ExtractionError):
|
||||||
"""Invalid or missing login information"""
|
"""Invalid or missing login information"""
|
||||||
|
|
||||||
|
|
||||||
class AuthorizationError(Exception):
|
class AuthorizationError(ExtractionError):
|
||||||
"""Insufficient privileges to access a resource"""
|
"""Insufficient privileges to access a resource"""
|
||||||
|
|
||||||
|
|
||||||
class NotFoundError(Exception):
|
class NotFoundError(ExtractionError):
|
||||||
"""Requested resource (gallery/image) does not exist"""
|
"""Requested resource (gallery/image) does not exist"""
|
||||||
|
|
||||||
|
|
||||||
class StopExtraction(Exception):
|
class HttpError(ExtractionError):
|
||||||
|
"""HTTP request during extraction failed"""
|
||||||
|
|
||||||
|
|
||||||
|
class NoExtractorError(GalleryDLException):
|
||||||
|
"""No extractor can handle the given URL"""
|
||||||
|
|
||||||
|
|
||||||
|
class StopExtraction(GalleryDLException):
|
||||||
"""Extraction should stop"""
|
"""Extraction should stop"""
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
|
|||||||
"p": 1,
|
"p": 1,
|
||||||
"supress_webtoon": "t",
|
"supress_webtoon": "t",
|
||||||
}
|
}
|
||||||
response = self.session.get(self.reader_url, params=params)
|
response = self.request(self.reader_url, params=params, fatal=False)
|
||||||
if response.status_code == 405:
|
if response.status_code == 405:
|
||||||
error = text.extract(response.text, "ERROR [", "]")[0]
|
error = text.extract(response.text, "ERROR [", "]")[0]
|
||||||
if error == "10030":
|
if error == "10030":
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ import requests
|
|||||||
import threading
|
import threading
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
from .message import Message
|
from .message import Message
|
||||||
from .. import config
|
from .. import config, exception
|
||||||
|
|
||||||
|
|
||||||
class Extractor():
|
class Extractor():
|
||||||
@@ -47,11 +47,22 @@ class Extractor():
|
|||||||
return config.interpolate(
|
return config.interpolate(
|
||||||
("extractor", self.category, self.subcategory, key), default)
|
("extractor", self.category, self.subcategory, key), default)
|
||||||
|
|
||||||
def request(self, url, encoding=None, *args, **kwargs):
|
def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
|
||||||
response = safe_request(self.session, url, *args, **kwargs)
|
*args, **kwargs):
|
||||||
if encoding:
|
while True:
|
||||||
response.encoding = encoding
|
try:
|
||||||
return response
|
response = self.session.request(method, url, *args, **kwargs)
|
||||||
|
if fatal:
|
||||||
|
response.raise_for_status()
|
||||||
|
if encoding:
|
||||||
|
response.encoding = encoding
|
||||||
|
return response
|
||||||
|
except requests.exceptions.RequestException as exc:
|
||||||
|
msg = exc
|
||||||
|
retries -= 1
|
||||||
|
if not retries:
|
||||||
|
raise exception.HttpError(msg)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
def _get_auth_info(self):
|
def _get_auth_info(self):
|
||||||
"""Return authentication information as (username, password) tuple"""
|
"""Return authentication information as (username, password) tuple"""
|
||||||
@@ -164,33 +175,8 @@ class MangaExtractor(Extractor):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def safe_request(session, url, method="GET", *args, **kwargs):
|
# Reduce strictness of the expected magic string in cookiejar files.
|
||||||
tries = 0
|
# (This allows the use of Wget-generated cookiejars without modification)
|
||||||
while True:
|
|
||||||
# try to connect to remote source
|
|
||||||
try:
|
|
||||||
r = session.request(method, url, *args, **kwargs)
|
|
||||||
except requests.exceptions.ConnectionError:
|
|
||||||
tries += 1
|
|
||||||
time.sleep(1)
|
|
||||||
if tries == 5:
|
|
||||||
raise
|
|
||||||
continue
|
|
||||||
|
|
||||||
# reject error-status-codes
|
|
||||||
if r.status_code != requests.codes.ok:
|
|
||||||
tries += 1
|
|
||||||
time.sleep(1)
|
|
||||||
if tries == 5:
|
|
||||||
r.raise_for_status()
|
|
||||||
continue
|
|
||||||
|
|
||||||
# everything ok -- proceed to download
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
# Reduce strictness of the expected magic string in cookie jar files.
|
|
||||||
# (This allows the use of Wget-generated cookiejar files without modification)
|
|
||||||
|
|
||||||
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
|
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
|
||||||
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
|
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
|
||||||
|
|||||||
@@ -242,7 +242,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
|||||||
self.url = "https://" + match.group(1)
|
self.url = "https://" + match.group(1)
|
||||||
|
|
||||||
def deviations(self):
|
def deviations(self):
|
||||||
response = self.session.get(self.url)
|
response = self.request(self.url, fatal=False)
|
||||||
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
|
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
|
||||||
if response.status_code != 200 or not deviation_id:
|
if response.status_code != 200 or not deviation_id:
|
||||||
raise exception.NotFoundError("image")
|
raise exception.NotFoundError("image")
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
|||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
|
|
||||||
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
|
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
|
||||||
response = self.session.get(url)
|
response = self.request(url, fatal=False)
|
||||||
page = response.text
|
page = response.text
|
||||||
if response.status_code == 404 and "Gallery Not Available" in page:
|
if response.status_code == 404 and "Gallery Not Available" in page:
|
||||||
raise exception.AuthorizationError()
|
raise exception.AuthorizationError()
|
||||||
@@ -196,7 +196,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
|||||||
"""Actual login implementation"""
|
"""Actual login implementation"""
|
||||||
self.log.info("Logging in as %s", username)
|
self.log.info("Logging in as %s", username)
|
||||||
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
|
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
|
||||||
params = {
|
data = {
|
||||||
"CookieDate": "1",
|
"CookieDate": "1",
|
||||||
"b": "d",
|
"b": "d",
|
||||||
"bt": "1-1",
|
"bt": "1-1",
|
||||||
@@ -206,7 +206,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
referer = "https://e-hentai.org/bounce_login.php?b=d&bt=1-1"
|
referer = "https://e-hentai.org/bounce_login.php?b=d&bt=1-1"
|
||||||
self.session.headers["Referer"] = referer
|
self.session.headers["Referer"] = referer
|
||||||
response = self.session.post(url, data=params)
|
response = self.request(url, method="POST", data=data)
|
||||||
|
|
||||||
if "You are now logged in as:" not in response.text:
|
if "You are now logged in as:" not in response.text:
|
||||||
raise exception.AuthenticationError()
|
raise exception.AuthenticationError()
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ class GfycatImageExtractor(GfycatExtractor):
|
|||||||
|
|
||||||
def _get_info(self, gfycat_id):
|
def _get_info(self, gfycat_id):
|
||||||
url = "https://gfycat.com/cajax/get/" + gfycat_id
|
url = "https://gfycat.com/cajax/get/" + gfycat_id
|
||||||
data = self.session.get(url).json()
|
data = self.request(url).json()
|
||||||
if "error" in data:
|
if "error" in data:
|
||||||
raise exception.NotFoundError()
|
raise exception.NotFoundError("animation")
|
||||||
return data["gfyItem"]
|
return data["gfyItem"]
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ class HentaifoundryUserExtractor(Extractor):
|
|||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
url = self.url_base + self.artist + "?enterAgree=1"
|
url = self.url_base + self.artist + "?enterAgree=1"
|
||||||
response = self.session.get(url)
|
response = self.request(url, fatal=False)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
raise exception.NotFoundError("user")
|
raise exception.NotFoundError("user")
|
||||||
page = response.text
|
page = response.text
|
||||||
@@ -150,7 +150,7 @@ class HentaifoundryImageExtractor(Extractor):
|
|||||||
"""Collect metadata for an image"""
|
"""Collect metadata for an image"""
|
||||||
url = "https://www.hentai-foundry.com/pictures/user/{}/{}".format(
|
url = "https://www.hentai-foundry.com/pictures/user/{}/{}".format(
|
||||||
self.artist, self.index)
|
self.artist, self.index)
|
||||||
response = self.session.get(url + "?enterAgree=1")
|
response = self.request(url + "?enterAgree=1", fatal=False)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
raise exception.NotFoundError("image")
|
raise exception.NotFoundError("image")
|
||||||
extr = text.extract
|
extr = text.extract
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class ImgurExtractor(Extractor):
|
|||||||
self.mp4 = self.config("mp4", True)
|
self.mp4 = self.config("mp4", True)
|
||||||
|
|
||||||
def _get_data(self, urlpart):
|
def _get_data(self, urlpart):
|
||||||
response = self.session.get("https://imgur.com/" + urlpart)
|
response = self.request("https://imgur.com/" + urlpart, fatal=False)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
raise exception.NotFoundError(self.subcategory)
|
raise exception.NotFoundError(self.subcategory)
|
||||||
data = text.extract(response.text, "image : ", ",\n")[0]
|
data = text.extract(response.text, "image : ", ",\n")[0]
|
||||||
|
|||||||
@@ -71,9 +71,9 @@ class NijieExtractor(AsynchronousExtractor):
|
|||||||
def _login_impl(self, username, password):
|
def _login_impl(self, username, password):
|
||||||
"""Actual login implementation"""
|
"""Actual login implementation"""
|
||||||
self.log.info("Logging in as %s", username)
|
self.log.info("Logging in as %s", username)
|
||||||
params = {"email": username, "password": password}
|
data = {"email": username, "password": password}
|
||||||
page = self.session.post("https://nijie.info/login_int.php",
|
page = self.request("https://nijie.info/login_int.php",
|
||||||
data=params).text
|
method="POST", data=data).text
|
||||||
if "//nijie.info/login.php" in page:
|
if "//nijie.info/login.php" in page:
|
||||||
raise exception.AuthenticationError()
|
raise exception.AuthenticationError()
|
||||||
return self.session.cookies
|
return self.session.cookies
|
||||||
@@ -102,7 +102,7 @@ class NijieUserExtractor(NijieExtractor):
|
|||||||
params = {"id": self.artist_id, "p": 1}
|
params = {"id": self.artist_id, "p": 1}
|
||||||
url = "https://nijie.info/members_illust.php"
|
url = "https://nijie.info/members_illust.php"
|
||||||
while True:
|
while True:
|
||||||
response = self.session.get(url, params=params)
|
response = self.request(url, params=params, fatal=False)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
raise exception.NotFoundError("artist")
|
raise exception.NotFoundError("artist")
|
||||||
ids = list(text.extract_iter(response.text, ' illust_id="', '"'))
|
ids = list(text.extract_iter(response.text, ' illust_id="', '"'))
|
||||||
@@ -133,8 +133,8 @@ class NijieImageExtractor(NijieExtractor):
|
|||||||
self.page = ""
|
self.page = ""
|
||||||
|
|
||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
response = self.session.get(self.popup_url + self.image_id,
|
response = self.request(self.popup_url + self.image_id,
|
||||||
allow_redirects=False)
|
allow_redirects=False)
|
||||||
if 300 <= response.status_code < 400:
|
if 300 <= response.status_code < 400:
|
||||||
raise exception.NotFoundError("image")
|
raise exception.NotFoundError("image")
|
||||||
self.page = response.text
|
self.page = response.text
|
||||||
|
|||||||
@@ -135,12 +135,10 @@ class MastodonAPI():
|
|||||||
"""Get an account's statuses"""
|
"""Get an account's statuses"""
|
||||||
url = "{}/api/v1/accounts/{}/statuses?only_media=1".format(
|
url = "{}/api/v1/accounts/{}/statuses?only_media=1".format(
|
||||||
self.root, account_id)
|
self.root, account_id)
|
||||||
while True:
|
while url:
|
||||||
response = self.session.get(url)
|
response = self.session.get(url)
|
||||||
yield from self._parse(response)
|
yield from self._parse(response)
|
||||||
url = response.links.get("next", {}).get("url")
|
url = response.links.get("next", {}).get("url")
|
||||||
if not url:
|
|
||||||
break
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse(response):
|
def _parse(response):
|
||||||
|
|||||||
@@ -57,8 +57,8 @@ class SeigaExtractor(Extractor):
|
|||||||
"""Actual login implementation"""
|
"""Actual login implementation"""
|
||||||
self.log.info("Logging in as %s", username)
|
self.log.info("Logging in as %s", username)
|
||||||
url = "https://account.nicovideo.jp/api/v1/login"
|
url = "https://account.nicovideo.jp/api/v1/login"
|
||||||
params = {"mail_tel": username, "password": password}
|
data = {"mail_tel": username, "password": password}
|
||||||
self.session.post(url, data=params).close()
|
self.request(url, method="POST", data=data)
|
||||||
if "user_session" not in self.session.cookies:
|
if "user_session" not in self.session.cookies:
|
||||||
raise exception.AuthenticationError()
|
raise exception.AuthenticationError()
|
||||||
del self.session.cookies["nicosid"]
|
del self.session.cookies["nicosid"]
|
||||||
|
|||||||
@@ -49,9 +49,11 @@ class Job():
|
|||||||
except exception.AuthorizationError:
|
except exception.AuthorizationError:
|
||||||
log.error("You do not have permission to access the resource "
|
log.error("You do not have permission to access the resource "
|
||||||
"at '%s'", self.url)
|
"at '%s'", self.url)
|
||||||
except exception.NotFoundError as err:
|
except exception.NotFoundError as exc:
|
||||||
res = str(err) or "resource (gallery/image/user)"
|
res = str(exc) or "resource (gallery/image/user)"
|
||||||
log.error("The %s at '%s' does not exist", res, self.url)
|
log.error("The %s at '%s' does not exist", res, self.url)
|
||||||
|
except exception.HttpError as exc:
|
||||||
|
log.error("HTTP request failed:\n%s", exc)
|
||||||
except exception.StopExtraction:
|
except exception.StopExtraction:
|
||||||
pass
|
pass
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|||||||
Reference in New Issue
Block a user