From b1b01f56afdf2c805769613feafbd4e845a1398a Mon Sep 17 00:00:00 2001 From: Deer-Spangle Date: Sat, 31 Aug 2024 12:56:50 +0100 Subject: [PATCH 1/6] Add support for the readable URL format of Weasyl favourites page --- gallery_dl/extractor/weasyl.py | 8 +++++++- test/results/weasyl.py | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index ddbfaa06..8ea448cd 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -160,14 +160,20 @@ class WeasylJournalsExtractor(WeasylExtractor): class WeasylFavoriteExtractor(WeasylExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{owner_login}", "Favorites") - pattern = BASE_PATTERN + r"favorites\?userid=(\d+)" + pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|\/([\w~-]+))" example = "https://www.weasyl.com/favorites?userid=12345" def __init__(self, match): WeasylExtractor.__init__(self, match) self.userid = match.group(1) + self.username = match.group(2) def items(self): + if self.userid is None and self.username is not None: + new_url = self.root + f"/favorites/{self.username}" + page = self.request(new_url).text + self.userid = text.extr(page, '= 5", }, +{ + "#url" : "https://www.weasyl.com/favorites/furoferre", + "#category": ("", "weasyl", "favorite"), + "#class" : weasyl.WeasylFavoriteExtractor, + "#count" : ">= 5", +} + ) From 311272f55a0bfb922277c8d14fb1ebfcc7e5e9f5 Mon Sep 17 00:00:00 2001 From: Deer-Spangle Date: Sat, 31 Aug 2024 15:10:02 +0100 Subject: [PATCH 2/6] Linting --- gallery_dl/extractor/weasyl.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index 8ea448cd..92117f9b 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -172,7 +172,11 @@ class WeasylFavoriteExtractor(WeasylExtractor): if self.userid is None and self.username is not None: new_url = self.root + f"/favorites/{self.username}" page = self.request(new_url).text - self.userid = text.extr(page, ' Date: Sat, 31 Aug 2024 15:22:39 +0100 Subject: [PATCH 3/6] Fixing syntax on python 3.5, the sad days before f-strings --- gallery_dl/extractor/weasyl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index 92117f9b..7e26c5d3 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -170,7 +170,7 @@ class WeasylFavoriteExtractor(WeasylExtractor): def items(self): if self.userid is None and self.username is not None: - new_url = self.root + f"/favorites/{self.username}" + new_url = self.root + "/favorites/{}".format(self.username) page = self.request(new_url).text self.userid = text.extr( page, From 10543c64c6bb7d2fadd348329e31d6ae5e29a901 Mon Sep 17 00:00:00 2001 From: Deer-Spangle Date: Sat, 31 Aug 2024 15:54:04 +0100 Subject: [PATCH 4/6] Store favourites in the Favorites directory of the user's favs being downloaded, rather than in different directories for each submission --- gallery_dl/extractor/weasyl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index 7e26c5d3..1ece749a 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -159,7 +159,7 @@ class WeasylJournalsExtractor(WeasylExtractor): class WeasylFavoriteExtractor(WeasylExtractor): subcategory = "favorite" - directory_fmt = ("{category}", "{owner_login}", "Favorites") + directory_fmt = ("{category}", "{user}", "Favorites") pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|\/([\w~-]+))" example = "https://www.weasyl.com/favorites?userid=12345" From cbecaecc437a6591ecdf1b9ac1e03d0c5521f80f Mon Sep 17 00:00:00 2001 From: Deer-Spangle Date: Sun, 22 Sep 2024 11:12:34 +0100 Subject: [PATCH 5/6] Fix infinite loop in Weasyl Favs downloader --- gallery_dl/extractor/weasyl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index 1ece749a..5b4b20b7 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -192,9 +192,11 @@ class WeasylFavoriteExtractor(WeasylExtractor): if not owner_login: owner_login = text.extr(page, ' Date: Wed, 25 Sep 2024 20:02:01 +0200 Subject: [PATCH 6/6] [weasyl:favorite] update - use 'self.groups' to access userid and username - safe one request by not doing an explicit username -> userid lookup - safe one request by following the 'Next' link instead of detecting an empty page --- gallery_dl/extractor/weasyl.py | 38 ++++++++++++++-------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index 5b4b20b7..13b05206 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -160,50 +160,44 @@ class WeasylJournalsExtractor(WeasylExtractor): class WeasylFavoriteExtractor(WeasylExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user}", "Favorites") - pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|\/([\w~-]+))" + pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|/([^/?#]+))" example = "https://www.weasyl.com/favorites?userid=12345" - def __init__(self, match): - WeasylExtractor.__init__(self, match) - self.userid = match.group(1) - self.username = match.group(2) - def items(self): - if self.userid is None and self.username is not None: - new_url = self.root + "/favorites/{}".format(self.username) - page = self.request(new_url).text - self.userid = text.extr( - page, - 'Next (', pos) + except ValueError: return - params["nextid"] = submitid + path = text.unescape(text.rextract(page, 'href="', '"', pos)[0]) + params = None