From 952c03bc9ef633b6ee4a67d5cf09e286f2813eea Mon Sep 17 00:00:00 2001 From: Bad Manners Date: Fri, 2 Jun 2023 19:53:47 -0300 Subject: [PATCH] Add fav_id data to FuraffinityFavoriteExtractor An extra field is collected when paginating favorites, and saved to a temporary cache variable. This field is identical for both the old and the new page layouts for FurAffinity, but can only be collected during pagination, hence the cache variable. Other FurAffinity extractors should be unaffected by this change. --- gallery_dl/extractor/furaffinity.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index cc43cec9..51ae0941 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -63,6 +63,9 @@ class FuraffinityExtractor(Extractor): def metadata(self): return None + def _fa_extra_post_data(self, post_id): + return None + def skip(self, num): self.offset += num return num @@ -132,6 +135,10 @@ class FuraffinityExtractor(Extractor): data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) data["description"] = self._process_description(data["_description"]) + extra_data = self._fa_extra_post_data(post_id) + if extra_data: + data.update(extra_data) + return data @staticmethod @@ -159,7 +166,16 @@ class FuraffinityExtractor(Extractor): while path: page = self.request(self.root + path).text - yield from text.extract_iter(page, 'id="sid-', '"') + extr = text.extract_from(page) + while True: + post_id = extr('id="sid-', '"') + if not post_id: + break + if hasattr(self, '_fa_extra_data_fav_dict'): + self._fa_extra_data_fav_dict[post_id] = { + 'fav_id': text.parse_int(extr('data-fav-id="', '"')), + } + yield post_id path = text.extr(page, 'right" href="', '"') def _pagination_search(self, query): @@ -238,6 +254,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user!l}", "Favorites") pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" + _fa_extra_data_fav_dict = {} test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" r"/art/[^/]+/\d+/\d+.\w+\.\w+", @@ -248,6 +265,9 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): def posts(self): return self._pagination_favorites() + def _fa_extra_post_data(self, post_id): + return self._fa_extra_data_fav_dict.pop(post_id, None) + class FuraffinitySearchExtractor(FuraffinityExtractor): """Extractor for furaffinity search results"""