[exhentai] provide fallback URLs (#1021, #4745)

This commit is contained in:
Mike Fährmann
2023-11-04 17:06:46 +01:00
parent f4e61fd1d5
commit 69b931b9bb

View File

@@ -275,15 +275,19 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.key_next = extr("'", "'") self.key_next = extr("'", "'")
iurl = extr('<img id="img" src="', '"') iurl = extr('<img id="img" src="', '"')
nl = extr(" nl(", ")").strip("\"'")
orig = extr('hentai.org/fullimg', '"') orig = extr('hentai.org/fullimg', '"')
try: try:
if self.original and orig: if self.original and orig:
url = self.root + "/fullimg" + text.unescape(orig) url = self.root + "/fullimg" + text.unescape(orig)
data = self._parse_original_info(extr('ownload original', '<')) data = self._parse_original_info(extr('ownload original', '<'))
data["_fallback"] = ("{}?nl={}".format(url, nl),)
else: else:
url = iurl url = iurl
data = self._parse_image_info(url) data = self._parse_image_info(url)
data["_fallback"] = self._fallback(
None, self.image_num, nl)
except IndexError: except IndexError:
self.log.debug("Page content:\n%s", page) self.log.debug("Page content:\n%s", page)
raise exception.StopExtraction( raise exception.StopExtraction(
@@ -317,6 +321,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
imgkey = nextkey imgkey = nextkey
nextkey, pos = text.extract(i3, "'", "'") nextkey, pos = text.extract(i3, "'", "'")
imgurl , pos = text.extract(i3, 'id="img" src="', '"', pos) imgurl , pos = text.extract(i3, 'id="img" src="', '"', pos)
nl , pos = text.extract(i3, " nl(", ")", pos)
nl = (nl or "").strip("\"'")
try: try:
pos = i6.find("hentai.org/fullimg") pos = i6.find("hentai.org/fullimg")
@@ -325,9 +331,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
url = text.unescape(origurl) url = text.unescape(origurl)
data = self._parse_original_info(text.extract( data = self._parse_original_info(text.extract(
i6, "ownload original", "<", pos)[0]) i6, "ownload original", "<", pos)[0])
data["_fallback"] = ("{}?nl={}".format(url, nl),)
else: else:
url = imgurl url = imgurl
data = self._parse_image_info(url) data = self._parse_image_info(url)
data["_fallback"] = self._fallback(
imgkey, request["page"], nl)
except IndexError: except IndexError:
self.log.debug("Page content:\n%s", page) self.log.debug("Page content:\n%s", page)
raise exception.StopExtraction( raise exception.StopExtraction(
@@ -401,6 +410,14 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.NotFoundError("image page") raise exception.NotFoundError("image page")
return page return page
def _fallback(self, imgkey, num, nl):
url = "{}/s/{}/{}-{}?nl={}".format(
self.root, imgkey or self.key_start, self.gallery_id, num, nl)
page = self.request(url, fatal=False).text
if page.startswith(("Invalid page", "Keep trying")):
return
yield self.image_from_page(page)[0]
@staticmethod @staticmethod
def _parse_image_info(url): def _parse_image_info(url):
for part in url.split("/")[4:]: for part in url.split("/")[4:]: