From 17bd053d94f816b5fbf3afd6d3d700b5c6e582b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 15 Mar 2023 14:28:03 +0100 Subject: [PATCH] [hiperdex] fix extraction (#3768) --- docs/supportedsites.md | 2 +- gallery_dl/extractor/hiperdex.py | 34 +++++++++++--------------------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0397ff59..22e848a5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -309,7 +309,7 @@ Consider all sites to be NSFW unless otherwise known. Hiperdex - https://1sthiperdex.com/ + https://hiperdex.com/ Artists, Chapters, Manga diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index d61c139b..98641110 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://1sthiperdex.com/""" +"""Extractors for https://hiperdex.com/""" from .common import ChapterExtractor, MangaExtractor from .. import text @@ -20,7 +20,7 @@ BASE_PATTERN = (r"((?:https?://)?(?:www\.)?" class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" - root = "https://1sthiperdex.com" + root = "https://hiperdex.com" @memcache(keyarg=1) def manga_data(self, manga, page=None): @@ -31,7 +31,7 @@ class HiperdexBase(): return { "manga" : text.unescape(extr( - "", "<").rpartition("&")[0].strip()), + "<title>", "<").rpartition(" - ")[0].strip()), "score" : text.parse_float(extr( 'id="averagerate">', '<')), "author" : text.remove_html(extr( @@ -65,10 +65,10 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): - """Extractor for manga chapters from 1sthiperdex.com""" + """Extractor for manga chapters from hiperdex.com""" pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))" test = ( - ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/", { + ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { "pattern": r"https://(1st)?hiperdex\d?.(com|net|info)" r"/wp-content/uploads/WP-manga/data" r"/manga_\w+/[0-9a-f]{32}/\d+\.webp", @@ -86,7 +86,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): "type" : "Manga", }, }), - ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/"), + ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/"), ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"), ("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"), ("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"), @@ -109,11 +109,11 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): - """Extractor for manga from 1sthiperdex.com""" + """Extractor for manga from hiperdex.com""" chapterclass = HiperdexChapterExtractor pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$" test = ( - ("https://1sthiperdex.com/manga/youre-not-that-special/", { + ("https://hiperdex.com/manga/youre-not-that-special/", { "count": 51, "pattern": HiperdexChapterExtractor.pattern, "keyword": { @@ -130,7 +130,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): "type" : "Manhwa", }, }), - ("https://hiperdex.com/manga/youre-not-that-special/"), + ("https://1sthiperdex.com/manga/youre-not-that-special/"), ("https://hiperdex2.com/manga/youre-not-that-special/"), ("https://hiperdex.net/manga/youre-not-that-special/"), ("https://hiperdex.info/manga/youre-not-that-special/"), @@ -145,19 +145,9 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): self.manga_data(self.manga, page) results = [] - shortlink = text.extr(page, "rel='shortlink' href='", "'") - data = { - "action" : "manga_get_reading_nav", - "manga" : shortlink.rpartition("=")[2], - "chapter" : "", - "volume_id": "", - "style" : "list", - "type" : "manga", - } - url = self.root + "/wp-admin/admin-ajax.php" - page = self.request(url, method="POST", data=data).text - - for url in text.extract_iter(page, 'data-redirect="', '"'): + for html in text.extract_iter( + page, '<li class="wp-manga-chapter', '</li>'): + url = text.extr(html, 'href="', '"') chapter = url.rpartition("/")[2] results.append((url, self.chapter_data(chapter)))