From e75a4138c1787f349e9e05b66efed752b768c251 Mon Sep 17 00:00:00 2001 From: sarma-tyrant <192052862+sarma-tyrant@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:08:43 -0400 Subject: [PATCH 1/4] [hentai-foundry] Html description option --- gallery_dl/extractor/hentaifoundry.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index e5299400..6e3f8360 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -32,6 +32,10 @@ class HentaifoundryExtractor(Extractor): self.start_post = 0 self.start_page = 1 + def _init(self): + if self.config("descriptions") == "html": + self._process_description = self._process_html_description + def items(self): self._init_site_filters() data = self.metadata() @@ -77,9 +81,9 @@ class HentaifoundryExtractor(Extractor): "artist" : text.unescape(extr('/profile">', '<')), "_body" : extr( '
Description
', '') - .replace("\r\n", "\n"), "", "")), + "_description": extr( + "
", '') + .replace("\r\n", "\n"), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "
"), "title='", "'")], "date" : text.parse_datetime(extr("datetime='", "'")), @@ -90,6 +94,7 @@ class HentaifoundryExtractor(Extractor): ">Tags ", "")), } + data["description"] = self._process_description(data["_description"]) body = data["_body"] if " Date: Fri, 1 Aug 2025 16:29:33 -0400 Subject: [PATCH 2/4] Linting --- gallery_dl/extractor/hentaifoundry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 6e3f8360..c9e146ea 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -83,7 +83,7 @@ class HentaifoundryExtractor(Extractor): '
", '') - .replace("\r\n", "\n"), + .replace("\r\n", "\n"), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "
"), "title='", "'")], "date" : text.parse_datetime(extr("datetime='", "'")), @@ -112,8 +112,8 @@ class HentaifoundryExtractor(Extractor): return text.nameext_from_url(data["src"], data) def _process_html_description(self, description: str): - pos1 = description.rfind(' Date: Fri, 1 Aug 2025 16:36:20 -0400 Subject: [PATCH 3/4] Added to docs --- docs/configuration.rst | 13 +++++++++++++ docs/gallery-dl.conf | 1 + 2 files changed, 14 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index 0cff476e..bace3837 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2969,6 +2969,19 @@ Description Recursively download files from subfolders. +extractor.hentaifoundry.descriptions +---------------------------------- +Type + ``string`` +Default + ``"text"`` +Description + Controls the format of ``description`` metadata fields. + + * ``"text"``: Plain text with HTML tags removed + * ``"html"``: Raw HTML content + + extractor.hentaifoundry.include ------------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 1c4a463f..8deca8c9 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -364,6 +364,7 @@ }, "hentaifoundry": { + "descriptions": "text", "include": ["pictures"] }, "hitomi": From afa720d3e045f2014e6a089c9a0f44e5f47d801f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 2 Aug 2025 17:29:07 +0200 Subject: [PATCH 4/4] [hentaifoundry] add 'html' description test case --- docs/configuration.rst | 2 +- gallery_dl/extractor/hentaifoundry.py | 15 +++++++-------- test/results/hentaifoundry.py | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index bace3837..da95fb2e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2970,7 +2970,7 @@ Description extractor.hentaifoundry.descriptions ----------------------------------- +------------------------------------ Type ``string`` Default diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index c9e146ea..91bcd389 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -34,7 +34,7 @@ class HentaifoundryExtractor(Extractor): def _init(self): if self.config("descriptions") == "html": - self._process_description = self._process_html_description + self._process_description = self._process_description_html def items(self): self._init_site_filters() @@ -81,9 +81,9 @@ class HentaifoundryExtractor(Extractor): "artist" : text.unescape(extr('/profile">', '<')), "_body" : extr( '
", '') - .replace("\r\n", "\n"), + .replace("\r\n", "\n")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "
"), "title='", "'")], "date" : text.parse_datetime(extr("datetime='", "'")), @@ -94,7 +94,6 @@ class HentaifoundryExtractor(Extractor): ">Tags ", "")), } - data["description"] = self._process_description(data["_description"]) body = data["_body"] if " +I hope you'll like it.
+Sorry for the bad quality, I made it on after effect because Flash works like shit when you have 44 layers to animate, and the final ae SWF file is 55mo big.\ +""", + "extension" : "swf", + "index" : 186714, + "tags" : ["soloid"], + "title" : "Osaloop", +}, + { "#url" : "http://www.hentai-foundry.com/pictures/user/Tenpura/407501/", "#category": ("", "hentaifoundry", "image"),