[furaffinity] support classic layout (#284)

This commit is contained in:
Mike Fährmann
2020-02-12 21:39:43 +01:00
parent 138135c190
commit c7cf9dd111
3 changed files with 65 additions and 30 deletions

View File

@@ -738,6 +738,22 @@ Description Sets the maximum allowed size for downloaded images.
=========== =====
extractor.furaffinity.include
-----------------------------
=========== =====
Type ``string`` or ``list`` of ``strings``
Default ``"gallery"``
Example ``"scraps,favorite"`` or ``["scraps", "favorite"]``
Description A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
``"gallery"``, ``"scraps"``, ``"favorite"``.
You can use ``"all"`` instead of listing all values separately.
=========== =====
extractor.gelbooru.api
----------------------
=========== =====

View File

@@ -21,6 +21,7 @@ class FuraffinityExtractor(Extractor):
directory_fmt = ("{category}", "{user!l}")
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
cookiedomain = ".furaffinity.net"
root = "https://www.furaffinity.net"
def __init__(self, match):
@@ -33,7 +34,6 @@ class FuraffinityExtractor(Extractor):
post = self._parse_post(post_id)
if post:
yield Message.Directory, post
text.nameext_from_url(post["url"], post)
yield Message.Url, post["url"], post
def posts(self):
@@ -46,27 +46,41 @@ class FuraffinityExtractor(Extractor):
def _parse_post(self, post_id):
url = "{}/view/{}/".format(self.root, post_id)
extr = text.extract_from(self.request(url).text)
title, _, artist = text.unescape(extr(
'property="og:title" content="', '"')).rpartition(" by ")
if not extr('class="download', '>'):
path = extr('href="//d.facdn.net/', '"')
if not path:
self.log.warning(
"Unable to download post %s (\"%s\")", post_id,
text.remove_html(extr('class="link-override">', '</p>')))
"Unable to download post %s (\"%s\")",
post_id, text.remove_html(
extr('System Message', '</section>') or
extr('System Message', '</table>')
)
)
return None
return {
data = text.nameext_from_url(path, {
"id" : text.parse_int(post_id),
"title" : title,
"artist": artist,
"user" : self.user or artist,
"url" : "https:" + extr('href="', '"'),
"tags" : text.split_html(extr('class="tags-row">', '</section>')),
"date" : text.parse_datetime(extr(
'<strong><span title="', '"'), "%b %d, %Y %I:%M %p"),
"description": text.unescape(text.remove_html(extr(
'<div class="submission-description">', '</div>'), "", "")),
}
"url" : "https://d.facdn.net/" + path
})
tags = extr('class="tags-row">', '</section>')
if tags:
data["tags"] = text.split_html(tags)
data["description"] = text.unescape(text.remove_html(extr(
'class="section-body">', '</div>'), "", ""))
else:
data["tags"] = text.split_html(extr(
'id="keywords">', '</div>'))[::2]
data["description"] = text.unescape(text.remove_html(extr(
"</table>", "</table>"), "", ""))
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
return data
def _pagination(self):
num = 1
@@ -90,7 +104,7 @@ class FuraffinityExtractor(Extractor):
while path:
page = self.request(self.root + path).text
yield from text.extract_iter(page, 'id="sid-', '"')
path = text.extract(page, 'button standard right" href="', '"')[0]
path = text.extract(page, 'right" href="', '"')[0]
class FuraffinityGalleryExtractor(FuraffinityExtractor):
@@ -133,22 +147,25 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
class FuraffinityPostExtractor(FuraffinityExtractor):
"""Extractor for individual posts on furaffinity"""
subcategory = "post"
pattern = BASE_PATTERN + r"/view/(\d+)"
test = ("https://www.furaffinity.net/view/21835115/", {
"url": "eae4ef93d99365c69b31a37561bd800c03d336ad",
"keyword": {
"artist" : "mirlinthloth",
"date" : "type:datetime",
"description": "A Song made playing the game Cosmic DJ.",
"extension" : "mp3",
"filename" : r"re:\d+\.mirlinthloth_dj_fennmink_-_bude_s_4_ever",
"id" : 21835115,
"tags" : list,
"title" : "Bude's 4 Ever",
"url" : "re:https://d.facdn.net/art/mirlinthloth/music/",
"user" : "mirlinthloth",
},
})
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
test = (
("https://www.furaffinity.net/view/21835115/", {
"url": "eae4ef93d99365c69b31a37561bd800c03d336ad",
"keyword": {
"artist" : "mirlinthloth",
"date" : "type:datetime",
"description": "A Song made playing the game Cosmic DJ.",
"extension" : "mp3",
"filename" : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever",
"id" : 21835115,
"tags" : list,
"title" : "Bude's 4 Ever",
"url" : "re:https://d.facdn.net/art/mirlinthloth/music",
"user" : "mirlinthloth",
},
}),
("https://www.furaffinity.net/full/21835115/"),
)
def posts(self):
post_id = self.user
@@ -159,6 +176,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
class FuraffinityUserExtractor(FuraffinityExtractor):
"""Extractor for furaffinity user profiles"""
subcategory = "user"
cookiedomain = None
pattern = BASE_PATTERN + r"/user/([^/?&#]+)"
test = (
("https://www.furaffinity.net/user/mirlinthloth/", {

View File

@@ -26,6 +26,7 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
"35photo",
"mangapark",
"photobucket",
}