[hentaifoundry] add support for stories (closes #734)
This commit is contained in:
@@ -962,13 +962,13 @@ Type
|
|||||||
Default
|
Default
|
||||||
``"gallery"``
|
``"gallery"``
|
||||||
Example
|
Example
|
||||||
``"scraps,favorite"`` or ``["scraps", "favorite"]``
|
``"scraps,stories"`` or ``["scraps", "stories"]``
|
||||||
Description
|
Description
|
||||||
A (comma-separated) list of subcategories to include
|
A (comma-separated) list of subcategories to include
|
||||||
when processing a user profile.
|
when processing a user profile.
|
||||||
|
|
||||||
Possible values are
|
Possible values are
|
||||||
``"gallery"``, ``"scraps"``, ``"favorite"``.
|
``"gallery"``, ``"scraps"``, ``"stories"``, ``"favorite"``.
|
||||||
|
|
||||||
You can use ``"all"`` instead of listing all values separately.
|
You can use ``"all"`` instead of listing all values separately.
|
||||||
|
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
|
|||||||
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles
|
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles
|
||||||
.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles
|
.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles
|
||||||
.. |furaffinity-C| replace:: Favorites, Galleries, Posts, Scraps, Search Results, User Profiles
|
.. |furaffinity-C| replace:: Favorites, Galleries, Posts, Scraps, Search Results, User Profiles
|
||||||
.. |hentaifoundry-C| replace:: Favorites, Galleries, individual Images, Popular Images, Recent Images, Scraps, User Profiles
|
.. |hentaifoundry-C| replace:: Favorites, Galleries, individual Images, Popular Images, Recent Images, Scraps, Stories, User Profiles
|
||||||
.. |imgur-C| replace:: Albums, Favorites, Galleries, individual Images, Search Results, Subreddits, Tag Searches, User Profiles
|
.. |imgur-C| replace:: Albums, Favorites, Galleries, individual Images, Search Results, Subreddits, Tag Searches, User Profiles
|
||||||
.. |instagram-C| replace:: Channels, individual Images, Saved Posts, Stories, Tag Searches, User Profiles
|
.. |instagram-C| replace:: Channels, individual Images, Saved Posts, Stories, Tag Searches, User Profiles
|
||||||
.. |newgrounds-C| replace:: Art, Audio, Favorites, individual Images, Media Files, Movies, User Profiles
|
.. |newgrounds-C| replace:: Art, Audio, Favorites, individual Images, Media Files, Movies, User Profiles
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
|
|
||||||
self.set_filters()
|
self.set_filters()
|
||||||
for page_url in util.advance(self.get_image_pages(), self.start_post):
|
for page_url in util.advance(self._pagination(), self.start_post):
|
||||||
image = self.get_image_metadata(page_url)
|
image = self.get_image_metadata(page_url)
|
||||||
image.update(data)
|
image.update(data)
|
||||||
yield Message.Url, image["src"], image
|
yield Message.Url, image["src"], image
|
||||||
@@ -50,13 +50,12 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
self.request(self.root + "/?enterAgree=1")
|
self.request(self.root + "/?enterAgree=1")
|
||||||
return {"user": self.user}
|
return {"user": self.user}
|
||||||
|
|
||||||
def get_image_pages(self):
|
def _pagination(self, begin='thumbTitle"><a href="', end='"'):
|
||||||
"""Yield urls of all relevant image pages"""
|
|
||||||
num = self.start_page
|
num = self.start_page
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
page = self.request("{}/page/{}".format(self.page_url, num)).text
|
page = self.request("{}/page/{}".format(self.page_url, num)).text
|
||||||
yield from text.extract_iter(page, 'thumbTitle"><a href="', '"')
|
yield from text.extract_iter(page, begin, end)
|
||||||
|
|
||||||
if 'class="pager"' not in page or 'class="last hidden"' in page:
|
if 'class="pager"' not in page or 'class="last hidden"' in page:
|
||||||
return
|
return
|
||||||
@@ -90,6 +89,33 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
|
|
||||||
return text.nameext_from_url(data["src"], data)
|
return text.nameext_from_url(data["src"], data)
|
||||||
|
|
||||||
|
def get_story_metadata(self, html):
|
||||||
|
"""Collect url and metadata for a story"""
|
||||||
|
extr = text.extract_from(html)
|
||||||
|
data = {
|
||||||
|
"user" : self.user,
|
||||||
|
"title" : text.unescape(extr(
|
||||||
|
"<div class='titlebar'>", "</a>").rpartition(">")[2]),
|
||||||
|
"author" : text.unescape(extr('alt="', '"')),
|
||||||
|
"date" : text.parse_datetime(extr(
|
||||||
|
">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
|
||||||
|
"status" : extr("class='indent'>", "<"),
|
||||||
|
}
|
||||||
|
|
||||||
|
for c in ("Chapters", "Words", "Comments", "Views", "Rating"):
|
||||||
|
data[c.lower()] = text.parse_int(extr(
|
||||||
|
">" + c + ":</span>", "<").replace(",", ""))
|
||||||
|
|
||||||
|
data["description"] = text.unescape(extr(
|
||||||
|
"class='storyDescript'>", "<div"))
|
||||||
|
path = extr('href="', '"')
|
||||||
|
data["src"] = self.root + path
|
||||||
|
data["index"] = text.parse_int(path.rsplit("/", 2)[1])
|
||||||
|
data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr(
|
||||||
|
"class='ratings_box'", "</div>"), "title='", "'")]
|
||||||
|
|
||||||
|
return text.nameext_from_url(data["src"], data)
|
||||||
|
|
||||||
def set_filters(self):
|
def set_filters(self):
|
||||||
"""Set site-internal filters to show all images"""
|
"""Set site-internal filters to show all images"""
|
||||||
token = text.unquote(text.extract(
|
token = text.unquote(text.extract(
|
||||||
@@ -134,14 +160,16 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
|
|||||||
HentaifoundryExtractor.__init__(self, match, match.group(1))
|
HentaifoundryExtractor.__init__(self, match, match.group(1))
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
user = "/user/" + self.user + "/"
|
user = "/user/" + self.user
|
||||||
return self._dispatch_extractors((
|
return self._dispatch_extractors((
|
||||||
(HentaifoundryGalleryExtractor ,
|
(HentaifoundryGalleryExtractor ,
|
||||||
self.root + "/pictures" + user),
|
self.root + "/pictures" + user),
|
||||||
(HentaifoundryScrapsExtractor ,
|
(HentaifoundryScrapsExtractor,
|
||||||
self.root + "/pictures" + user + "scraps"),
|
self.root + "/pictures" + user + "/scraps"),
|
||||||
|
(HentaifoundryStoriesExtractor,
|
||||||
|
self.root + "/stories" + user),
|
||||||
(HentaifoundryFavoriteExtractor,
|
(HentaifoundryFavoriteExtractor,
|
||||||
self.root + user + "faves/pictures"),
|
self.root + user + "/faves/pictures"),
|
||||||
), ("gallery",))
|
), ("gallery",))
|
||||||
|
|
||||||
|
|
||||||
@@ -303,3 +331,68 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
|||||||
|
|
||||||
def skip(self, _):
|
def skip(self, _):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
|
||||||
|
"""Extractor for stories of a hentai-foundry user"""
|
||||||
|
subcategory = "stories"
|
||||||
|
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
|
||||||
|
r"/stories/user/([^/]+)(?:/page/(\d+))?/?$")
|
||||||
|
test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", {
|
||||||
|
"count": ">= 35",
|
||||||
|
"keyword": {
|
||||||
|
"author" : "SnowWolf35",
|
||||||
|
"chapters" : int,
|
||||||
|
"comments" : int,
|
||||||
|
"date" : "type:datetime",
|
||||||
|
"description": str,
|
||||||
|
"index" : int,
|
||||||
|
"rating" : int,
|
||||||
|
"ratings" : list,
|
||||||
|
"status" : "re:(Inc|C)omplete",
|
||||||
|
"title" : str,
|
||||||
|
"user" : "SnowWolf35",
|
||||||
|
"views" : int,
|
||||||
|
"words" : int,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
HentaifoundryExtractor.__init__(self, match, match.group(1))
|
||||||
|
self.page_url = "{}/stories/user/{}".format(self.root, self.user)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
self.get_job_metadata()
|
||||||
|
self.set_filters()
|
||||||
|
stories = self._pagination('<div class="storyRow">', '</tr></table>')
|
||||||
|
for story_html in util.advance(stories, self.start_post):
|
||||||
|
story = self.get_story_metadata(story_html)
|
||||||
|
yield Message.Directory, story
|
||||||
|
yield Message.Url, story["src"], story
|
||||||
|
|
||||||
|
|
||||||
|
class HentaifoundryStoryExtractor(HentaifoundryExtractor):
|
||||||
|
"""Extractor for a hentaifoundry story"""
|
||||||
|
subcategory = "story"
|
||||||
|
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
|
||||||
|
r"/stories/user/([^/]+)/(\d+)")
|
||||||
|
test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35"
|
||||||
|
"/26416/Overwatch-High-Chapter-Voting-Location"), {
|
||||||
|
"url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
|
||||||
|
"keyword": {"title": "Overwatch High Chapter Voting Location"},
|
||||||
|
})
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
HentaifoundryExtractor.__init__(self, match, match.group(1))
|
||||||
|
self.index = match.group(2)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(
|
||||||
|
self.root, self.user, self.index)
|
||||||
|
page = self.request(story_url).text
|
||||||
|
story = self.get_story_metadata(page)
|
||||||
|
yield Message.Directory, story
|
||||||
|
yield Message.Url, story["src"], story
|
||||||
|
|
||||||
|
def skip(self, _):
|
||||||
|
return 0
|
||||||
|
|||||||
@@ -119,6 +119,9 @@ SUBCATEGORY_MAP = {
|
|||||||
"deviantart": {
|
"deviantart": {
|
||||||
"stash": "Sta.sh",
|
"stash": "Sta.sh",
|
||||||
},
|
},
|
||||||
|
"hentaifoundry": {
|
||||||
|
"story": "",
|
||||||
|
},
|
||||||
"instagram": {
|
"instagram": {
|
||||||
"saved": "Saved Posts",
|
"saved": "Saved Posts",
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user