merge #7952: [hentaifoundry] add 'descriptions' option
This commit is contained in:
@@ -3000,6 +3000,19 @@ Description
|
|||||||
Recursively download files from subfolders.
|
Recursively download files from subfolders.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.hentaifoundry.descriptions
|
||||||
|
------------------------------------
|
||||||
|
Type
|
||||||
|
``string``
|
||||||
|
Default
|
||||||
|
``"text"``
|
||||||
|
Description
|
||||||
|
Controls the format of ``description`` metadata fields.
|
||||||
|
|
||||||
|
* ``"text"``: Plain text with HTML tags removed
|
||||||
|
* ``"html"``: Raw HTML content
|
||||||
|
|
||||||
|
|
||||||
extractor.hentaifoundry.include
|
extractor.hentaifoundry.include
|
||||||
-------------------------------
|
-------------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -370,6 +370,7 @@
|
|||||||
},
|
},
|
||||||
"hentaifoundry":
|
"hentaifoundry":
|
||||||
{
|
{
|
||||||
|
"descriptions": "text",
|
||||||
"include": ["pictures"]
|
"include": ["pictures"]
|
||||||
},
|
},
|
||||||
"hitomi":
|
"hitomi":
|
||||||
|
|||||||
@@ -32,6 +32,10 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
self.start_post = 0
|
self.start_post = 0
|
||||||
self.start_page = 1
|
self.start_page = 1
|
||||||
|
|
||||||
|
def _init(self):
|
||||||
|
if self.config("descriptions") == "html":
|
||||||
|
self._process_description = self._process_description_html
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self._init_site_filters()
|
self._init_site_filters()
|
||||||
data = self.metadata()
|
data = self.metadata()
|
||||||
@@ -77,9 +81,9 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
"artist" : text.unescape(extr('/profile">', '<')),
|
"artist" : text.unescape(extr('/profile">', '<')),
|
||||||
"_body" : extr(
|
"_body" : extr(
|
||||||
'<div class="boxbody"', '<div class="boxfooter"'),
|
'<div class="boxbody"', '<div class="boxfooter"'),
|
||||||
"description": text.unescape(text.remove_html(extr(
|
"description": self._process_description(extr(
|
||||||
'>Description</div>', '</section>')
|
"<div class='picDescript'>", '</section>')
|
||||||
.replace("\r\n", "\n"), "", "")),
|
.replace("\r\n", "\n")),
|
||||||
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
|
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
|
||||||
"class='ratings_box'", "</div>"), "title='", "'")],
|
"class='ratings_box'", "</div>"), "title='", "'")],
|
||||||
"date" : text.parse_datetime(extr("datetime='", "'")),
|
"date" : text.parse_datetime(extr("datetime='", "'")),
|
||||||
@@ -106,6 +110,14 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
|
|
||||||
return text.nameext_from_url(data["src"], data)
|
return text.nameext_from_url(data["src"], data)
|
||||||
|
|
||||||
|
def _process_description(self, description):
|
||||||
|
return text.unescape(text.remove_html(description, "", ""))
|
||||||
|
|
||||||
|
def _process_description_html(self, description):
|
||||||
|
pos1 = description.rfind('</div') # picDescript
|
||||||
|
pos2 = description.rfind('</div', None, pos1) # boxBody
|
||||||
|
return str.strip(description[0:pos2])
|
||||||
|
|
||||||
def _parse_story(self, html):
|
def _parse_story(self, html):
|
||||||
"""Collect url and metadata for a story"""
|
"""Collect url and metadata for a story"""
|
||||||
extr = text.extract_from(html)
|
extr = text.extract_from(html)
|
||||||
|
|||||||
@@ -144,6 +144,24 @@ __tests__ = (
|
|||||||
"width" : 613,
|
"width" : 613,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.hentai-foundry.com/pictures/user/Soloid/186714/Osaloop",
|
||||||
|
"#comment" : "HTML 'description'",
|
||||||
|
"#class" : hentaifoundry.HentaifoundryImageExtractor,
|
||||||
|
"#options" : {"descriptions": "html"},
|
||||||
|
"#results" : "https://pictures.hentai-foundry.com/s/Soloid/186714/Soloid-186714-Osaloop.swf",
|
||||||
|
|
||||||
|
"description": """\
|
||||||
|
It took me ages.<br />
|
||||||
|
I hope you'll like it.<br />
|
||||||
|
Sorry for the bad quality, I made it on after effect because Flash works like shit when you have 44 layers to animate, and the final ae SWF file is 55mo big.\
|
||||||
|
""",
|
||||||
|
"extension" : "swf",
|
||||||
|
"index" : 186714,
|
||||||
|
"tags" : ["soloid"],
|
||||||
|
"title" : "Osaloop",
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "http://www.hentai-foundry.com/pictures/user/Tenpura/407501/",
|
"#url" : "http://www.hentai-foundry.com/pictures/user/Tenpura/407501/",
|
||||||
"#category": ("", "hentaifoundry", "image"),
|
"#category": ("", "hentaifoundry", "image"),
|
||||||
|
|||||||
Reference in New Issue
Block a user