Add fav_id data to FuraffinityFavoriteExtractor

An extra field is collected when paginating favorites, and saved to
a temporary cache variable. This field is identical for both the old
and the new page layouts for FurAffinity, but can only be collected
during pagination, hence the cache variable. Other FurAffinity
extractors should be unaffected by this change.
This commit is contained in:
Bad Manners
2023-06-02 19:53:47 -03:00
parent 54cf1fa3e7
commit 952c03bc9e

View File

@@ -63,6 +63,9 @@ class FuraffinityExtractor(Extractor):
def metadata(self): def metadata(self):
return None return None
def _fa_extra_post_data(self, post_id):
return None
def skip(self, num): def skip(self, num):
self.offset += num self.offset += num
return num return num
@@ -132,6 +135,10 @@ class FuraffinityExtractor(Extractor):
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"]) data["description"] = self._process_description(data["_description"])
extra_data = self._fa_extra_post_data(post_id)
if extra_data:
data.update(extra_data)
return data return data
@staticmethod @staticmethod
@@ -159,7 +166,16 @@ class FuraffinityExtractor(Extractor):
while path: while path:
page = self.request(self.root + path).text page = self.request(self.root + path).text
yield from text.extract_iter(page, 'id="sid-', '"') extr = text.extract_from(page)
while True:
post_id = extr('id="sid-', '"')
if not post_id:
break
if hasattr(self, '_fa_extra_data_fav_dict'):
self._fa_extra_data_fav_dict[post_id] = {
'fav_id': text.parse_int(extr('data-fav-id="', '"')),
}
yield post_id
path = text.extr(page, 'right" href="', '"') path = text.extr(page, 'right" href="', '"')
def _pagination_search(self, query): def _pagination_search(self, query):
@@ -238,6 +254,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
subcategory = "favorite" subcategory = "favorite"
directory_fmt = ("{category}", "{user!l}", "Favorites") directory_fmt = ("{category}", "{user!l}", "Favorites")
pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
_fa_extra_data_fav_dict = {}
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
r"/art/[^/]+/\d+/\d+.\w+\.\w+", r"/art/[^/]+/\d+/\d+.\w+\.\w+",
@@ -248,6 +265,9 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
def posts(self): def posts(self):
return self._pagination_favorites() return self._pagination_favorites()
def _fa_extra_post_data(self, post_id):
return self._fa_extra_data_fav_dict.pop(post_id, None)
class FuraffinitySearchExtractor(FuraffinityExtractor): class FuraffinitySearchExtractor(FuraffinityExtractor):
"""Extractor for furaffinity search results""" """Extractor for furaffinity search results"""