diff --git a/docs/configuration.rst b/docs/configuration.rst index b450a15f..5444affd 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -738,6 +738,22 @@ Description Sets the maximum allowed size for downloaded images. =========== ===== +extractor.furaffinity.include +----------------------------- +=========== ===== +Type ``string`` or ``list`` of ``strings`` +Default ``"gallery"`` +Example ``"scraps,favorite"`` or ``["scraps", "favorite"]`` +Description A (comma-separated) list of subcategories to include + when processing a user profile. + + Possible values are + ``"gallery"``, ``"scraps"``, ``"favorite"``. + + You can use ``"all"`` instead of listing all values separately. +=========== ===== + + extractor.gelbooru.api ---------------------- =========== ===== diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 8a1e6b32..d5465661 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -21,6 +21,7 @@ class FuraffinityExtractor(Extractor): directory_fmt = ("{category}", "{user!l}") filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}" + cookiedomain = ".furaffinity.net" root = "https://www.furaffinity.net" def __init__(self, match): @@ -33,7 +34,6 @@ class FuraffinityExtractor(Extractor): post = self._parse_post(post_id) if post: yield Message.Directory, post - text.nameext_from_url(post["url"], post) yield Message.Url, post["url"], post def posts(self): @@ -46,27 +46,41 @@ class FuraffinityExtractor(Extractor): def _parse_post(self, post_id): url = "{}/view/{}/".format(self.root, post_id) extr = text.extract_from(self.request(url).text) - title, _, artist = text.unescape(extr( 'property="og:title" content="', '"')).rpartition(" by ") - if not extr('class="download', '>'): + path = extr('href="//d.facdn.net/', '"') + + if not path: self.log.warning( - "Unable to download post %s (\"%s\")", post_id, - text.remove_html(extr('class="link-override">', '
'))) + "Unable to download post %s (\"%s\")", + post_id, text.remove_html( + extr('System Message', '') or + extr('System Message', '') + ) + ) return None - return { + data = text.nameext_from_url(path, { "id" : text.parse_int(post_id), "title" : title, "artist": artist, "user" : self.user or artist, - "url" : "https:" + extr('href="', '"'), - "tags" : text.split_html(extr('class="tags-row">', '')), - "date" : text.parse_datetime(extr( - '', ''), "", "")), - } + "url" : "https://d.facdn.net/" + path + }) + + tags = extr('class="tags-row">', '') + if tags: + data["tags"] = text.split_html(tags) + data["description"] = text.unescape(text.remove_html(extr( + 'class="section-body">', ''), "", "")) + else: + data["tags"] = text.split_html(extr( + 'id="keywords">', ''))[::2] + data["description"] = text.unescape(text.remove_html(extr( + "", ""), "", "")) + data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) + + return data def _pagination(self): num = 1 @@ -90,7 +104,7 @@ class FuraffinityExtractor(Extractor): while path: page = self.request(self.root + path).text yield from text.extract_iter(page, 'id="sid-', '"') - path = text.extract(page, 'button standard right" href="', '"')[0] + path = text.extract(page, 'right" href="', '"')[0] class FuraffinityGalleryExtractor(FuraffinityExtractor): @@ -133,22 +147,25 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): class FuraffinityPostExtractor(FuraffinityExtractor): """Extractor for individual posts on furaffinity""" subcategory = "post" - pattern = BASE_PATTERN + r"/view/(\d+)" - test = ("https://www.furaffinity.net/view/21835115/", { - "url": "eae4ef93d99365c69b31a37561bd800c03d336ad", - "keyword": { - "artist" : "mirlinthloth", - "date" : "type:datetime", - "description": "A Song made playing the game Cosmic DJ.", - "extension" : "mp3", - "filename" : r"re:\d+\.mirlinthloth_dj_fennmink_-_bude_s_4_ever", - "id" : 21835115, - "tags" : list, - "title" : "Bude's 4 Ever", - "url" : "re:https://d.facdn.net/art/mirlinthloth/music/", - "user" : "mirlinthloth", - }, - }) + pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)" + test = ( + ("https://www.furaffinity.net/view/21835115/", { + "url": "eae4ef93d99365c69b31a37561bd800c03d336ad", + "keyword": { + "artist" : "mirlinthloth", + "date" : "type:datetime", + "description": "A Song made playing the game Cosmic DJ.", + "extension" : "mp3", + "filename" : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever", + "id" : 21835115, + "tags" : list, + "title" : "Bude's 4 Ever", + "url" : "re:https://d.facdn.net/art/mirlinthloth/music", + "user" : "mirlinthloth", + }, + }), + ("https://www.furaffinity.net/full/21835115/"), + ) def posts(self): post_id = self.user @@ -159,6 +176,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor): class FuraffinityUserExtractor(FuraffinityExtractor): """Extractor for furaffinity user profiles""" subcategory = "user" + cookiedomain = None pattern = BASE_PATTERN + r"/user/([^/?]+)" test = ( ("https://www.furaffinity.net/user/mirlinthloth/", { diff --git a/test/test_results.py b/test/test_results.py index 8d1a1e56..e0a7c353 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -26,6 +26,7 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { + "35photo", "mangapark", "photobucket", }