[hentaifoundry] extract 'categories' metadata (#8656)
* [hentaifoundry] Add support for categories * rename 'hf_categories' to 'categories' * move code into '_extract_categories()' method * update tests --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -86,6 +86,7 @@ class HentaifoundryExtractor(Extractor):
|
||||
.replace("\r\n", "\n")),
|
||||
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
|
||||
"class='ratings_box'", "</div>"), "title='", "'")],
|
||||
"categories" : self._extract_categories(extr),
|
||||
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
|
||||
"views" : text.parse_int(extr(">Views</span>", "<")),
|
||||
"score" : text.parse_int(extr(">Vote Score</span>", "<")),
|
||||
@@ -141,11 +142,17 @@ class HentaifoundryExtractor(Extractor):
|
||||
path = extr('class="pdfLink" href="', '"')
|
||||
data["src"] = self.root + path
|
||||
data["index"] = text.parse_int(path.rsplit("/", 2)[1])
|
||||
data["categories"] = self._extract_categories(extr)
|
||||
data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr(
|
||||
"class='ratings_box'", "</div>"), "title='", "'")]
|
||||
|
||||
return text.nameext_from_url(data["src"], data)
|
||||
|
||||
def _extract_categories(self, extr):
|
||||
return [text.unescape(text.extr(c, ">", "<"))
|
||||
for c in extr('class="categoryBreadcrumbs">', "</span>")
|
||||
.split("»")]
|
||||
|
||||
def _request_check(self, url, **kwargs):
|
||||
self.request = self._request_original
|
||||
|
||||
|
||||
@@ -90,6 +90,7 @@ __tests__ = (
|
||||
"#sha1_content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
|
||||
|
||||
"artist" : "Tenpura",
|
||||
"categories" : ["Anime & Manga"],
|
||||
"date" : "dt:2016-02-22 14:41:19",
|
||||
"description": "Thank you!",
|
||||
"height" : 700,
|
||||
@@ -120,6 +121,7 @@ __tests__ = (
|
||||
"#results" : "https://pictures.hentai-foundry.com/s/Soloid/186714/Soloid-186714-Osaloop.swf",
|
||||
|
||||
"artist" : "Soloid",
|
||||
"categories" : ["Misc"],
|
||||
"date" : "dt:2013-02-07 17:25:54",
|
||||
"description": "It took me ages.\nI hope you'll like it.\nSorry for the bad quality, I made it on after effect because Flash works like shit when you have 44 layers to animate, and the final ae SWF file is 55mo big.",
|
||||
"extension" : "swf",
|
||||
@@ -204,22 +206,24 @@ Sorry for the bad quality, I made it on after effect because Flash works like sh
|
||||
{
|
||||
"#url" : "https://www.hentai-foundry.com/stories/user/Likelymouse",
|
||||
"#class" : hentaifoundry.HentaifoundryStoriesExtractor,
|
||||
"#range" : "2",
|
||||
"#results" : "https://www.hentai-foundry.com/stories/user/Likelymouse/77892/The-Sweater-and-Scarf-Public-Investigators-Part-1.pdf",
|
||||
|
||||
"author" : "Likelymouse",
|
||||
"chapters" : 4,
|
||||
"categories" : ["Original", "Neko Shoujo & Kemonomimi"],
|
||||
"chapters" : 11,
|
||||
"comments" : 0,
|
||||
"date" : "dt:2025-09-01 00:00:00",
|
||||
"date" : "dt:2025-10-04 00:00:00",
|
||||
"extension" : "pdf",
|
||||
"filename" : "The-Sweater-and-Scarf-Public-Investigators-Part-1",
|
||||
"index" : 77892,
|
||||
"rating" : 0,
|
||||
"src" : "https://www.hentai-foundry.com/stories/user/Likelymouse/77892/The-Sweater-and-Scarf-Public-Investigators-Part-1.pdf",
|
||||
"status" : "Incomplete",
|
||||
"status" : "Complete",
|
||||
"title" : "The Sweater and Scarf Public Investigators, Part 1",
|
||||
"user" : "Likelymouse",
|
||||
"views" : range(100, 10_000),
|
||||
"words" : 10661,
|
||||
"words" : 47031,
|
||||
"description": """\
|
||||
<div style="text-align:center"><a href="https://imgur.com/a/uRDss5c"><img src="https://i.imgur.com/SCWI09e.jpeg" alt="135x240"/></a><br />
|
||||
<br />
|
||||
@@ -250,6 +254,7 @@ Follow 22 y/o Puffy Penelope as she descends into degeneracy, mastering her new
|
||||
"#sha1_url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
|
||||
|
||||
"title": "Overwatch High Chapter Voting Location",
|
||||
"categories": ["Games", "Overwatch"],
|
||||
},
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user