From feacdd5d8771ed90d5f063fd39248b9d2458912d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 12 Apr 2025 20:14:31 +0200 Subject: [PATCH] [hentai2read] fix exception for chapters without artist (#7355) --- gallery_dl/extractor/hentai2read.py | 24 ++++++++++++++---------- test/results/hentai2read.py | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index 9ab1411f..1317ce98 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -25,26 +25,30 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor): pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/([^/?#]+))" example = "https://hentai2read.com/TITLE/1/" - def __init__(self, match): - self.chapter = match.group(2) - ChapterExtractor.__init__(self, match) - def metadata(self, page): title, pos = text.extract(page, "", "") manga_id, pos = text.extract(page, 'data-mid="', '"', pos) chapter_id, pos = text.extract(page, 'data-cid="', '"', pos) - chapter, sep, minor = self.chapter.partition(".") - match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - " + chapter, sep, minor = self.groups[1].partition(".") + + match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - " r"([^:]+): (.+) . Page 1 ", title) + if match: + manga, type, author, _, title = match.groups() + else: + self.log.warning("Failed to extract 'manga', 'type', 'author', " + "and 'title' metadata") + manga = type = author = title = "" + return { - "manga": match.group(1), + "manga": manga, "manga_id": text.parse_int(manga_id), "chapter": text.parse_int(chapter), "chapter_minor": sep + minor, "chapter_id": text.parse_int(chapter_id), - "type": match.group(2), - "author": match.group(3), - "title": match.group(5), + "type": type, + "author": author, + "title": title, "lang": "en", "language": "English", } diff --git a/test/results/hentai2read.py b/test/results/hentai2read.py index 01349c2e..0b46e10a 100644 --- a/test/results/hentai2read.py +++ b/test/results/hentai2read.py @@ -37,6 +37,31 @@ __tests__ = ( "type" : "Original", }, +{ + "#url" : "https://hentai2read.com/nozoki_ana/1/", + "#category": ("", "hentai2read", "chapter"), + "#class" : hentai2read.Hentai2readChapterExtractor, + "#pattern" : r"https://hentaicdn\.com/hentai/2720/1/hcdn00\d+\.jpg", + "#count" : 203, + + "author" : "", + "chapter" : 1, + "chapter_id" : 2965, + "chapter_minor": "", + "count" : 203, + "extension" : "jpg", + "filename" : str, + "lang" : "en", + "language" : "English", + "manga" : "Nozoki Ana [Ecchi]", + "manga_id" : 2720, + "page" : range(1, 203), + "subcategory" : "chapter", + "title" : "Nozoki Ana 1", + "type" : "Original", + +}, + { "#url" : "https://hentai2read.com/amazon_elixir/", "#category": ("", "hentai2read", "manga"),