From 2ab74dfbf1197e4ac5628013ac74498433b426d6 Mon Sep 17 00:00:00 2001 From: icelit4 <248354500+icelit4@users.noreply.github.com> Date: Mon, 8 Dec 2025 01:15:59 -0800 Subject: [PATCH] [hentaifoundry] extract 'categories' metadata (#8656) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [hentaifoundry] Add support for categories * rename 'hf_categories' to 'categories' * move code into '_extract_categories()' method * update tests --------- Co-authored-by: Mike Fährmann --- gallery_dl/extractor/hentaifoundry.py | 7 +++++++ test/results/hentaifoundry.py | 13 +++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index cd0106bc..882183b9 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -86,6 +86,7 @@ class HentaifoundryExtractor(Extractor): .replace("\r\n", "\n")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", ""), "title='", "'")], + "categories" : self._extract_categories(extr), "date" : self.parse_datetime_iso(extr("datetime='", "'")), "views" : text.parse_int(extr(">Views", "<")), "score" : text.parse_int(extr(">Vote Score", "<")), @@ -141,11 +142,17 @@ class HentaifoundryExtractor(Extractor): path = extr('class="pdfLink" href="', '"') data["src"] = self.root + path data["index"] = text.parse_int(path.rsplit("/", 2)[1]) + data["categories"] = self._extract_categories(extr) data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", ""), "title='", "'")] return text.nameext_from_url(data["src"], data) + def _extract_categories(self, extr): + return [text.unescape(text.extr(c, ">", "<")) + for c in extr('class="categoryBreadcrumbs">', "") + .split("»")] + def _request_check(self, url, **kwargs): self.request = self._request_original diff --git a/test/results/hentaifoundry.py b/test/results/hentaifoundry.py index 65aaafcf..7c4229cf 100644 --- a/test/results/hentaifoundry.py +++ b/test/results/hentaifoundry.py @@ -90,6 +90,7 @@ __tests__ = ( "#sha1_content": "91bf01497c39254b6dfb234a18e8f01629c77fd1", "artist" : "Tenpura", + "categories" : ["Anime & Manga"], "date" : "dt:2016-02-22 14:41:19", "description": "Thank you!", "height" : 700, @@ -120,6 +121,7 @@ __tests__ = ( "#results" : "https://pictures.hentai-foundry.com/s/Soloid/186714/Soloid-186714-Osaloop.swf", "artist" : "Soloid", + "categories" : ["Misc"], "date" : "dt:2013-02-07 17:25:54", "description": "It took me ages.\nI hope you'll like it.\nSorry for the bad quality, I made it on after effect because Flash works like shit when you have 44 layers to animate, and the final ae SWF file is 55mo big.", "extension" : "swf", @@ -204,22 +206,24 @@ Sorry for the bad quality, I made it on after effect because Flash works like sh { "#url" : "https://www.hentai-foundry.com/stories/user/Likelymouse", "#class" : hentaifoundry.HentaifoundryStoriesExtractor, + "#range" : "2", "#results" : "https://www.hentai-foundry.com/stories/user/Likelymouse/77892/The-Sweater-and-Scarf-Public-Investigators-Part-1.pdf", "author" : "Likelymouse", - "chapters" : 4, + "categories" : ["Original", "Neko Shoujo & Kemonomimi"], + "chapters" : 11, "comments" : 0, - "date" : "dt:2025-09-01 00:00:00", + "date" : "dt:2025-10-04 00:00:00", "extension" : "pdf", "filename" : "The-Sweater-and-Scarf-Public-Investigators-Part-1", "index" : 77892, "rating" : 0, "src" : "https://www.hentai-foundry.com/stories/user/Likelymouse/77892/The-Sweater-and-Scarf-Public-Investigators-Part-1.pdf", - "status" : "Incomplete", + "status" : "Complete", "title" : "The Sweater and Scarf Public Investigators, Part 1", "user" : "Likelymouse", "views" : range(100, 10_000), - "words" : 10661, + "words" : 47031, "description": """\
135x240

@@ -250,6 +254,7 @@ Follow 22 y/o Puffy Penelope as she descends into degeneracy, mastering her new "#sha1_url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8", "title": "Overwatch High Chapter Voting Location", + "categories": ["Games", "Overwatch"], }, )