diff --git a/docs/configuration.rst b/docs/configuration.rst index f2537ec0..e28fd976 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3000,6 +3000,19 @@ Description Recursively download files from subfolders. +extractor.hentaifoundry.descriptions +------------------------------------ +Type + ``string`` +Default + ``"text"`` +Description + Controls the format of ``description`` metadata fields. + + * ``"text"``: Plain text with HTML tags removed + * ``"html"``: Raw HTML content + + extractor.hentaifoundry.include ------------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index ce0bad65..715ec7ea 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -370,6 +370,7 @@ }, "hentaifoundry": { + "descriptions": "text", "include": ["pictures"] }, "hitomi": diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index e5299400..91bcd389 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -32,6 +32,10 @@ class HentaifoundryExtractor(Extractor): self.start_post = 0 self.start_page = 1 + def _init(self): + if self.config("descriptions") == "html": + self._process_description = self._process_description_html + def items(self): self._init_site_filters() data = self.metadata() @@ -77,9 +81,9 @@ class HentaifoundryExtractor(Extractor): "artist" : text.unescape(extr('/profile">', '<')), "_body" : extr( '
Description
', '') - .replace("\r\n", "\n"), "", "")), + "description": self._process_description(extr( + "
", '') + .replace("\r\n", "\n")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "
"), "title='", "'")], "date" : text.parse_datetime(extr("datetime='", "'")), @@ -106,6 +110,14 @@ class HentaifoundryExtractor(Extractor): return text.nameext_from_url(data["src"], data) + def _process_description(self, description): + return text.unescape(text.remove_html(description, "", "")) + + def _process_description_html(self, description): + pos1 = description.rfind(' +I hope you'll like it.
+Sorry for the bad quality, I made it on after effect because Flash works like shit when you have 44 layers to animate, and the final ae SWF file is 55mo big.\ +""", + "extension" : "swf", + "index" : 186714, + "tags" : ["soloid"], + "title" : "Osaloop", +}, + { "#url" : "http://www.hentai-foundry.com/pictures/user/Tenpura/407501/", "#category": ("", "hentaifoundry", "image"),