From 37d584a9b2dce9a225d4ae88de808048fffb4a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 26 Mar 2022 12:46:18 +0100 Subject: [PATCH] [hitomi] update metadata extraction (fixes #2444) remove 'hitomi.metadata' option, as it is no longer necessary to make additional HTTP requests to fetch all metadata. --- docs/configuration.rst | 11 -------- gallery_dl/extractor/hitomi.py | 49 +++++++--------------------------- 2 files changed, 9 insertions(+), 51 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 49bf83d4..8472bac9 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1336,17 +1336,6 @@ Description but is most likely going to fail with ``403 Forbidden`` errors. -extractor.hitomi.metadata -------------------------- -Type - ``bool`` -Default - ``false`` -Description - Try to extract - ``artist``, ``group``, ``parody``, and ``characters`` metadata. - - extractor.imgur.mp4 ------------------- Type diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 34eaaaba..ca7e6927 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -28,8 +28,7 @@ class HitomiGalleryExtractor(GalleryExtractor): ("https://hitomi.la/galleries/867789.html", { "pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+" r"/[0-9a-f]{64}\.webp", - "keyword": "4b584d09d535694d7d757c47daf5c15d116420d2", - "options": (("metadata", True),), + "keyword": "86af5371f38117a07407f11af689bdd460b09710", "count": 16, }), # download test @@ -77,23 +76,18 @@ class HitomiGalleryExtractor(GalleryExtractor): def metadata(self, page): self.info = info = json.loads(page.partition("=")[2]) + iget = info.get - data = self._data_from_gallery_info(info) - if self.config("metadata", False): - data.update(self._data_from_gallery_page(info)) - return data - - def _data_from_gallery_info(self, info): - language = info.get("language") + language = iget("language") if language: language = language.capitalize() - date = info.get("date") + date = iget("date") if date: date += ":00" tags = [] - for tinfo in info.get("tags") or (): + for tinfo in iget("tags") or (): tag = string.capwords(tinfo["tag"]) if tinfo.get("female"): tag += " ♀" @@ -109,35 +103,10 @@ class HitomiGalleryExtractor(GalleryExtractor): "lang" : util.language_to_code(language), "date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"), "tags" : tags, - } - - def _data_from_gallery_page(self, info): - url = "{}/galleries/{}.html".format(self.root, info["id"]) - - # follow redirects - while True: - response = self.request(url, fatal=False) - if b"Redirect" not in response.content: - break - url = text.extract( - response.text, 'http-equiv="refresh" content="', '"', - )[0].partition("=")[2] - - if response.status_code >= 400: - return {} - - def prep(value): - return [ - text.unescape(string.capwords(v)) - for v in text.extract_iter(value or "", '.html">', '<') - ] - - extr = text.extract_from(response.text) - return { - "artist" : prep(extr('

', '

')), - "group" : prep(extr('Group', '')), - "parody" : prep(extr('Series', '')), - "characters": prep(extr('Characters', '')), + "artist" : [o["artist"] for o in iget("artists") or ()], + "group" : [o["group"] for o in iget("groups") or ()], + "parody" : [o["parody"] for o in iget("parodys") or ()], + "characters": [o["character"] for o in iget("characters") or ()] } def images(self, _):