From 859f1e7d040eb8782cf2369d06a8654e42ba6d52 Mon Sep 17 00:00:00 2001 From: Deer-Spangle Date: Tue, 11 Mar 2025 20:26:15 +0000 Subject: [PATCH] [furaffinity] Adding a FuraffinityFolderExtractor, which extracts a single folder - Ensure FuraffinityGalleryExtractor doesn't detect folder links - Fix example URL for folder extractor - Reordering classes a bit - Another tweak of the regex - One more go at the regex.. - cleanup --- docs/supportedsites.md | 2 +- gallery_dl/extractor/furaffinity.py | 27 +++++++++++++++++++++++---- test/results/furaffinity.py | 17 +++++++++++++++++ 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index df533243..4046e49c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -286,7 +286,7 @@ Consider all listed sites to potentially be NSFW. Fur Affinity https://www.furaffinity.net/ - Favorites, Followed Users, Galleries, Posts, Scraps, Search Results, New Submissions, User Profiles + Favorites, Folders, Followed Users, Galleries, Posts, Scraps, Search Results, New Submissions, User Profiles Cookies diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 216aeb14..b6ae1b1d 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -153,12 +153,13 @@ class FuraffinityExtractor(Extractor): def _process_description(description): return text.unescape(text.remove_html(description, "", "")) - def _pagination(self, path): + def _pagination(self, path, folder=None): num = 1 + folder = "" if folder is None else "/folder/{}/a".format(folder) while True: - url = "{}/{}/{}/{}/".format( - self.root, path, self.user, num) + url = "{}/{}/{}{}/{}/".format( + self.root, path, self.user, folder, num) page = self.request(url).text post_id = None @@ -232,13 +233,31 @@ class FuraffinityExtractor(Extractor): class FuraffinityGalleryExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's gallery""" subcategory = "gallery" - pattern = BASE_PATTERN + r"/gallery/([^/?#]+)" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)(?:$|/(?!folder/))" example = "https://www.furaffinity.net/gallery/USER/" def posts(self): return self._pagination("gallery") +class FuraffinityFolderExtractor(FuraffinityExtractor): + """Extractor for a FurAffinity folder""" + subcategory = "folder" + directory_fmt = ("{category}", "{user!l}", + "Folders", "{folder_id}{folder_name:? //}") + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?" + example = "https://www.furaffinity.net/gallery/USER/folder/12345/FOLDER" + + def metadata(self): + return { + "folder_id" : self.groups[1], + "folder_name": self.groups[2] or "", + } + + def posts(self): + return self._pagination("gallery", self.groups[1]) + + class FuraffinityScrapsExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's scraps""" subcategory = "scraps" diff --git a/test/results/furaffinity.py b/test/results/furaffinity.py index 29db8e82..a25609fe 100644 --- a/test/results/furaffinity.py +++ b/test/results/furaffinity.py @@ -17,6 +17,23 @@ __tests__ = ( "#count" : 6, }, +{ + "#url" : "https://www.furaffinity.net/gallery/markrun15/folder/173240/Inanimate/?", + "#category": ("", "furaffinity", "folder"), + "#class" : furaffinity.FuraffinityFolderExtractor, + "#range" : "46-50", + "#urla" : ( + "https://d.furaffinity.net/art/markrun15/1598704240/1598704240.markrun15_20200829_dusknoir_flat3.jpg", + "https://d.furaffinity.net/art/markrun15/1598704109/1598704109.markrun15_20200829_dusknoir_flat1.jpg", + "https://d.furaffinity.net/art/markrun15/1588674514/1588674514.markrun15_20200504_cubemorgana.jpg", + "https://d.furaffinity.net/art/markrun15/1588501280/1588501280.markrun15_20200427_inanimate_animal3.jpg", + "https://d.furaffinity.net/art/markrun15/1588501161/1588501161.markrun15_20200427_inanimate_animal.jpg", + ), + + "folder_id" : "173240", + "folder_name": "Inanimate", +}, + { "#url" : "https://www.furaffinity.net/scraps/mirlinthloth/", "#category": ("", "furaffinity", "scraps"),