From 952c03bc9ef633b6ee4a67d5cf09e286f2813eea Mon Sep 17 00:00:00 2001 From: Bad Manners Date: Fri, 2 Jun 2023 19:53:47 -0300 Subject: [PATCH 1/2] Add fav_id data to FuraffinityFavoriteExtractor An extra field is collected when paginating favorites, and saved to a temporary cache variable. This field is identical for both the old and the new page layouts for FurAffinity, but can only be collected during pagination, hence the cache variable. Other FurAffinity extractors should be unaffected by this change. --- gallery_dl/extractor/furaffinity.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index cc43cec9..51ae0941 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -63,6 +63,9 @@ class FuraffinityExtractor(Extractor): def metadata(self): return None + def _fa_extra_post_data(self, post_id): + return None + def skip(self, num): self.offset += num return num @@ -132,6 +135,10 @@ class FuraffinityExtractor(Extractor): data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) data["description"] = self._process_description(data["_description"]) + extra_data = self._fa_extra_post_data(post_id) + if extra_data: + data.update(extra_data) + return data @staticmethod @@ -159,7 +166,16 @@ class FuraffinityExtractor(Extractor): while path: page = self.request(self.root + path).text - yield from text.extract_iter(page, 'id="sid-', '"') + extr = text.extract_from(page) + while True: + post_id = extr('id="sid-', '"') + if not post_id: + break + if hasattr(self, '_fa_extra_data_fav_dict'): + self._fa_extra_data_fav_dict[post_id] = { + 'fav_id': text.parse_int(extr('data-fav-id="', '"')), + } + yield post_id path = text.extr(page, 'right" href="', '"') def _pagination_search(self, query): @@ -238,6 +254,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user!l}", "Favorites") pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" + _fa_extra_data_fav_dict = {} test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" r"/art/[^/]+/\d+/\d+.\w+\.\w+", @@ -248,6 +265,9 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): def posts(self): return self._pagination_favorites() + def _fa_extra_post_data(self, post_id): + return self._fa_extra_data_fav_dict.pop(post_id, None) + class FuraffinitySearchExtractor(FuraffinityExtractor): """Extractor for furaffinity search results""" From 5e3a1749c8833f9db8a7ef180dd4576f4ccc4d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 9 Jun 2023 16:30:49 +0200 Subject: [PATCH 2/2] [furaffinity] simplify 'favorite_id' assignment --- gallery_dl/extractor/furaffinity.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 51ae0941..9f5cbbae 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -63,9 +63,6 @@ class FuraffinityExtractor(Extractor): def metadata(self): return None - def _fa_extra_post_data(self, post_id): - return None - def skip(self, num): self.offset += num return num @@ -135,10 +132,6 @@ class FuraffinityExtractor(Extractor): data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) data["description"] = self._process_description(data["_description"]) - extra_data = self._fa_extra_post_data(post_id) - if extra_data: - data.update(extra_data) - return data @staticmethod @@ -171,10 +164,7 @@ class FuraffinityExtractor(Extractor): post_id = extr('id="sid-', '"') if not post_id: break - if hasattr(self, '_fa_extra_data_fav_dict'): - self._fa_extra_data_fav_dict[post_id] = { - 'fav_id': text.parse_int(extr('data-fav-id="', '"')), - } + self._favorite_id = text.parse_int(extr('data-fav-id="', '"')) yield post_id path = text.extr(page, 'right" href="', '"') @@ -254,10 +244,10 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user!l}", "Favorites") pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" - _fa_extra_data_fav_dict = {} test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" r"/art/[^/]+/\d+/\d+.\w+\.\w+", + "keyword": {"favorite_id": int}, "range": "45-50", "count": 6, }) @@ -265,8 +255,11 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): def posts(self): return self._pagination_favorites() - def _fa_extra_post_data(self, post_id): - return self._fa_extra_data_fav_dict.pop(post_id, None) + def _parse_post(self, post_id): + post = FuraffinityExtractor._parse_post(self, post_id) + if post: + post["favorite_id"] = self._favorite_id + return post class FuraffinitySearchExtractor(FuraffinityExtractor):