diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index 24769064..251c97a8 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -6,22 +6,49 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. +"""Exception classes used by gallery-dl -class NoExtractorError(Exception): - """No extractor can handle the given URL""" +Class Hierarchy: + +Exception + +-- GalleryDLException + +-- ExtractionError + | +-- AuthenticationError + | +-- AuthorizationError + | +-- NotFoundError + | +-- HttpError + +-- NoExtractorError + +-- StopExtraction +""" -class AuthenticationError(Exception): +class GalleryDLException(Exception): + """Base class for GalleryDL exceptions""" + + +class ExtractionError(GalleryDLException): + """Base class for exceptions during information extraction""" + + +class AuthenticationError(ExtractionError): """Invalid or missing login information""" -class AuthorizationError(Exception): +class AuthorizationError(ExtractionError): """Insufficient privileges to access a resource""" -class NotFoundError(Exception): +class NotFoundError(ExtractionError): """Requested resource (gallery/image) does not exist""" -class StopExtraction(Exception): +class HttpError(ExtractionError): + """HTTP request during extraction failed""" + + +class NoExtractorError(GalleryDLException): + """No extractor can handle the given URL""" + + +class StopExtraction(GalleryDLException): """Extraction should stop""" diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index d90de328..ba3c9290 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -107,7 +107,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor): "p": 1, "supress_webtoon": "t", } - response = self.session.get(self.reader_url, params=params) + response = self.request(self.reader_url, params=params, fatal=False) if response.status_code == 405: error = text.extract(response.text, "ERROR [", "]")[0] if error == "10030": diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 31e32c05..865a6fa4 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -18,7 +18,7 @@ import requests import threading import http.cookiejar from .message import Message -from .. import config +from .. import config, exception class Extractor(): @@ -47,11 +47,22 @@ class Extractor(): return config.interpolate( ("extractor", self.category, self.subcategory, key), default) - def request(self, url, encoding=None, *args, **kwargs): - response = safe_request(self.session, url, *args, **kwargs) - if encoding: - response.encoding = encoding - return response + def request(self, url, method="GET", encoding=None, fatal=True, retries=3, + *args, **kwargs): + while True: + try: + response = self.session.request(method, url, *args, **kwargs) + if fatal: + response.raise_for_status() + if encoding: + response.encoding = encoding + return response + except requests.exceptions.RequestException as exc: + msg = exc + retries -= 1 + if not retries: + raise exception.HttpError(msg) + time.sleep(1) def _get_auth_info(self): """Return authentication information as (username, password) tuple""" @@ -164,33 +175,8 @@ class MangaExtractor(Extractor): return [] -def safe_request(session, url, method="GET", *args, **kwargs): - tries = 0 - while True: - # try to connect to remote source - try: - r = session.request(method, url, *args, **kwargs) - except requests.exceptions.ConnectionError: - tries += 1 - time.sleep(1) - if tries == 5: - raise - continue - - # reject error-status-codes - if r.status_code != requests.codes.ok: - tries += 1 - time.sleep(1) - if tries == 5: - r.raise_for_status() - continue - - # everything ok -- proceed to download - return r - - -# Reduce strictness of the expected magic string in cookie jar files. -# (This allows the use of Wget-generated cookiejar files without modification) +# Reduce strictness of the expected magic string in cookiejar files. +# (This allows the use of Wget-generated cookiejars without modification) http.cookiejar.MozillaCookieJar.magic_re = re.compile( "#( Netscape)? HTTP Cookie File", re.IGNORECASE) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index cac276f3..4f65127d 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -242,7 +242,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): self.url = "https://" + match.group(1) def deviations(self): - response = self.session.get(self.url) + response = self.request(self.url, fatal=False) deviation_id = text.extract(response.text, '//deviation/', '"')[0] if response.status_code != 200 or not deviation_id: raise exception.NotFoundError("image") diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 1c0257d4..bfdc01ad 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -56,7 +56,7 @@ class ExhentaiGalleryExtractor(Extractor): yield Message.Version, 1 url = "{}/g/{}/{}/".format(self.root, self.gid, self.token) - response = self.session.get(url) + response = self.request(url, fatal=False) page = response.text if response.status_code == 404 and "Gallery Not Available" in page: raise exception.AuthorizationError() @@ -196,7 +196,7 @@ class ExhentaiGalleryExtractor(Extractor): """Actual login implementation""" self.log.info("Logging in as %s", username) url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01" - params = { + data = { "CookieDate": "1", "b": "d", "bt": "1-1", @@ -206,7 +206,7 @@ class ExhentaiGalleryExtractor(Extractor): } referer = "https://e-hentai.org/bounce_login.php?b=d&bt=1-1" self.session.headers["Referer"] = referer - response = self.session.post(url, data=params) + response = self.request(url, method="POST", data=data) if "You are now logged in as:" not in response.text: raise exception.AuthenticationError() diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 10e02492..467e453a 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -65,7 +65,7 @@ class GfycatImageExtractor(GfycatExtractor): def _get_info(self, gfycat_id): url = "https://gfycat.com/cajax/get/" + gfycat_id - data = self.session.get(url).json() + data = self.request(url).json() if "error" in data: - raise exception.NotFoundError() + raise exception.NotFoundError("animation") return data["gfyItem"] diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 498c5e17..30d6e47f 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -62,7 +62,7 @@ class HentaifoundryUserExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" url = self.url_base + self.artist + "?enterAgree=1" - response = self.session.get(url) + response = self.request(url, fatal=False) if response.status_code == 404: raise exception.NotFoundError("user") page = response.text @@ -150,7 +150,7 @@ class HentaifoundryImageExtractor(Extractor): """Collect metadata for an image""" url = "https://www.hentai-foundry.com/pictures/user/{}/{}".format( self.artist, self.index) - response = self.session.get(url + "?enterAgree=1") + response = self.request(url + "?enterAgree=1", fatal=False) if response.status_code == 404: raise exception.NotFoundError("image") extr = text.extract diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index aa4e46d9..e36a6ff4 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -23,7 +23,7 @@ class ImgurExtractor(Extractor): self.mp4 = self.config("mp4", True) def _get_data(self, urlpart): - response = self.session.get("https://imgur.com/" + urlpart) + response = self.request("https://imgur.com/" + urlpart, fatal=False) if response.status_code == 404: raise exception.NotFoundError(self.subcategory) data = text.extract(response.text, "image : ", ",\n")[0] diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 007a6ac5..fe25bddf 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -71,9 +71,9 @@ class NijieExtractor(AsynchronousExtractor): def _login_impl(self, username, password): """Actual login implementation""" self.log.info("Logging in as %s", username) - params = {"email": username, "password": password} - page = self.session.post("https://nijie.info/login_int.php", - data=params).text + data = {"email": username, "password": password} + page = self.request("https://nijie.info/login_int.php", + method="POST", data=data).text if "//nijie.info/login.php" in page: raise exception.AuthenticationError() return self.session.cookies @@ -102,7 +102,7 @@ class NijieUserExtractor(NijieExtractor): params = {"id": self.artist_id, "p": 1} url = "https://nijie.info/members_illust.php" while True: - response = self.session.get(url, params=params) + response = self.request(url, params=params, fatal=False) if response.status_code == 404: raise exception.NotFoundError("artist") ids = list(text.extract_iter(response.text, ' illust_id="', '"')) @@ -133,8 +133,8 @@ class NijieImageExtractor(NijieExtractor): self.page = "" def get_job_metadata(self): - response = self.session.get(self.popup_url + self.image_id, - allow_redirects=False) + response = self.request(self.popup_url + self.image_id, + allow_redirects=False) if 300 <= response.status_code < 400: raise exception.NotFoundError("image") self.page = response.text diff --git a/gallery_dl/extractor/pawoo.py b/gallery_dl/extractor/pawoo.py index cd43477a..c6069bb3 100644 --- a/gallery_dl/extractor/pawoo.py +++ b/gallery_dl/extractor/pawoo.py @@ -135,12 +135,10 @@ class MastodonAPI(): """Get an account's statuses""" url = "{}/api/v1/accounts/{}/statuses?only_media=1".format( self.root, account_id) - while True: + while url: response = self.session.get(url) yield from self._parse(response) url = response.links.get("next", {}).get("url") - if not url: - break @staticmethod def _parse(response): diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py index 8e0ba1cb..f48dc38c 100644 --- a/gallery_dl/extractor/seiga.py +++ b/gallery_dl/extractor/seiga.py @@ -57,8 +57,8 @@ class SeigaExtractor(Extractor): """Actual login implementation""" self.log.info("Logging in as %s", username) url = "https://account.nicovideo.jp/api/v1/login" - params = {"mail_tel": username, "password": password} - self.session.post(url, data=params).close() + data = {"mail_tel": username, "password": password} + self.request(url, method="POST", data=data) if "user_session" not in self.session.cookies: raise exception.AuthenticationError() del self.session.cookies["nicosid"] diff --git a/gallery_dl/job.py b/gallery_dl/job.py index d6c92ce5..fef93b73 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -49,9 +49,11 @@ class Job(): except exception.AuthorizationError: log.error("You do not have permission to access the resource " "at '%s'", self.url) - except exception.NotFoundError as err: - res = str(err) or "resource (gallery/image/user)" + except exception.NotFoundError as exc: + res = str(exc) or "resource (gallery/image/user)" log.error("The %s at '%s' does not exist", res, self.url) + except exception.HttpError as exc: + log.error("HTTP request failed:\n%s", exc) except exception.StopExtraction: pass except Exception as exc: