Add fav_id data to FuraffinityFavoriteExtractor
An extra field is collected when paginating favorites, and saved to a temporary cache variable. This field is identical for both the old and the new page layouts for FurAffinity, but can only be collected during pagination, hence the cache variable. Other FurAffinity extractors should be unaffected by this change.
This commit is contained in:
@@ -63,6 +63,9 @@ class FuraffinityExtractor(Extractor):
|
||||
def metadata(self):
|
||||
return None
|
||||
|
||||
def _fa_extra_post_data(self, post_id):
|
||||
return None
|
||||
|
||||
def skip(self, num):
|
||||
self.offset += num
|
||||
return num
|
||||
@@ -132,6 +135,10 @@ class FuraffinityExtractor(Extractor):
|
||||
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
|
||||
data["description"] = self._process_description(data["_description"])
|
||||
|
||||
extra_data = self._fa_extra_post_data(post_id)
|
||||
if extra_data:
|
||||
data.update(extra_data)
|
||||
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
@@ -159,7 +166,16 @@ class FuraffinityExtractor(Extractor):
|
||||
|
||||
while path:
|
||||
page = self.request(self.root + path).text
|
||||
yield from text.extract_iter(page, 'id="sid-', '"')
|
||||
extr = text.extract_from(page)
|
||||
while True:
|
||||
post_id = extr('id="sid-', '"')
|
||||
if not post_id:
|
||||
break
|
||||
if hasattr(self, '_fa_extra_data_fav_dict'):
|
||||
self._fa_extra_data_fav_dict[post_id] = {
|
||||
'fav_id': text.parse_int(extr('data-fav-id="', '"')),
|
||||
}
|
||||
yield post_id
|
||||
path = text.extr(page, 'right" href="', '"')
|
||||
|
||||
def _pagination_search(self, query):
|
||||
@@ -238,6 +254,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
|
||||
subcategory = "favorite"
|
||||
directory_fmt = ("{category}", "{user!l}", "Favorites")
|
||||
pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
|
||||
_fa_extra_data_fav_dict = {}
|
||||
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
|
||||
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
|
||||
r"/art/[^/]+/\d+/\d+.\w+\.\w+",
|
||||
@@ -248,6 +265,9 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
|
||||
def posts(self):
|
||||
return self._pagination_favorites()
|
||||
|
||||
def _fa_extra_post_data(self, post_id):
|
||||
return self._fa_extra_data_fav_dict.pop(post_id, None)
|
||||
|
||||
|
||||
class FuraffinitySearchExtractor(FuraffinityExtractor):
|
||||
"""Extractor for furaffinity search results"""
|
||||
|
||||
Reference in New Issue
Block a user