From 6be7df53da30d6a9105c3db7dbc9441b7e029cc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 13 Mar 2021 17:52:53 +0100 Subject: [PATCH] [hentaifox] improve metadata extraction (fixes #1378) --- gallery_dl/extractor/hentaifox.py | 51 ++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index df18a1bd..a5bebdde 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -22,27 +22,56 @@ class HentaifoxBase(): class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): """Extractor for image galleries on hentaifox.com""" pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))" - test = ("https://hentaifox.com/gallery/56622/", { - "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg", - "keyword": "bcd6b67284f378e5cc30b89b761140e3e60fcd92", - "count": 24, - }) + test = ( + ("https://hentaifox.com/gallery/56622/", { + "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg", + "keyword": "bcd6b67284f378e5cc30b89b761140e3e60fcd92", + "count": 24, + }), + # 'split_tag' element (#1378) + ("https://hentaifox.com/gallery/630/", { + "keyword": { + "artist": ["beti", "betty", "magi", "mimikaki"], + "characters": [ + "aerith gainsborough", + "tifa lockhart", + "yuffie kisaragi" + ], + "count": 32, + "gallery_id": 630, + "group": ["cu-little2"], + "parody": ["darkstalkers | vampire", "final fantasy vii"], + "tags": ["femdom", "fingering", "masturbation", "yuri"], + "title": "Cu-Little Bakanya~", + "type": "doujinshi", + }, + }), + ) def __init__(self, match): GalleryExtractor.__init__(self, match) self.gallery_id = match.group(2) - def metadata(self, page, split=text.split_html): + @staticmethod + def _split(txt): + return [ + text.remove_html(tag.partition(">")[2], "", "") + for tag in text.extract_iter( + txt, "class='tag_btn", "