[facebook] use main profile page for avatar & info results

This commit is contained in:
Mike Fährmann
2025-08-10 23:04:22 +02:00
parent 294557fc57
commit 4b3b2ad111
2 changed files with 46 additions and 41 deletions

View File

@@ -310,51 +310,33 @@ class FacebookExtractor(Extractor):
i += 1 i += 1
@memcache(keyarg=1) @memcache(keyarg=1)
def _extract_profile_photos_page(self, profile): def _extract_profile(self, profile, set_id=False):
profile_photos_url = f"{self.root}/{profile}/photos_by" if set_id:
url = f"{self.root}/{profile}/photos_by"
else:
url = f"{self.root}/{profile}"
return self._extract_profile_page(url)
def _extract_profile_page(self, url):
for _ in range(self.fallback_retries + 1): for _ in range(self.fallback_retries + 1):
profile_photos_page = self.request(profile_photos_url).text page = self.request(url).text
if page.find('>Page Not Found</title>', 0, 3000) > 0:
break
if ('"props":{"title":"This content isn\'t available right now"' in if ('"props":{"title":"This content isn\'t available right now"' in
profile_photos_page): page):
raise exception.AuthRequired( raise exception.AuthRequired(
"authenticated cookies", "profile", "authenticated cookies", "profile",
"This content isn't available right now") "This content isn't available right now")
set_id = self._extract_profile_set_id( set_id = self._extract_profile_set_id(page)
profile_photos_page) user = self._extract_profile_user(page)
user_data = text.extr( if set_id or user:
profile_photos_page, '","user":{"', '},"viewer":{') user["set_id"] = set_id
return user
if set_id or user_data:
break
self.log.debug("Got empty profile photos page, retrying...") self.log.debug("Got empty profile photos page, retrying...")
else: return {}
raise exception.AbortExtraction("Failed to extract profile data")
try:
data = util.json_loads(f'{{"{user_data}}}')
except Exception:
data = {}
self.log.debug(user_data)
try:
data["profile_tabs"] = [
edge["node"]
for edge in (data["profile_tabs"]["profile_user"]
["timeline_nav_app_sections"]["edges"])
]
except Exception:
pass
data["set_id"] = set_id
data["vanity"] = (
text.extr(profile_photos_page, '"userVanity":"', '"') or
text.extr(profile_photos_page, '"vanity":"', '"')
)
return data
def _extract_profile_set_id(self, profile_photos_page): def _extract_profile_set_id(self, profile_photos_page):
set_ids_raw = text.extr( set_ids_raw = text.extr(
@@ -369,6 +351,28 @@ class FacebookExtractor(Extractor):
return set_id return set_id
def _extract_profile_user(self, page):
data = text.extr(page, '","user":{"', '},"viewer":{')
user = None
try:
user = util.json_loads(f'{{"{data}}}')
if user["id"].startswith("pfbid"):
user["user_pfbid"] = user["id"]
user["id"] = text.extr(page, '"userID":"', '"')
user["username"] = (text.extr(page, '"userVanity":"', '"') or
text.extr(page, '"vanity":"', '"'))
user["profile_tabs"] = [
edge["node"]
for edge in (user["profile_tabs"]["profile_user"]
["timeline_nav_app_sections"]["edges"])
]
except Exception:
if user is None:
self.log.debug("Failed to extract user data: %s", data)
user = {}
return user
class FacebookSetExtractor(FacebookExtractor): class FacebookSetExtractor(FacebookExtractor):
"""Base class for Facebook Set extractors""" """Base class for Facebook Set extractors"""
@@ -465,11 +469,12 @@ class FacebookVideoExtractor(FacebookExtractor):
class FacebookInfoExtractor(FacebookExtractor): class FacebookInfoExtractor(FacebookExtractor):
"""Extractor for Facebook Profile data""" """Extractor for Facebook Profile data"""
subcategory = "info" subcategory = "info"
directory_fmt = ("{category}", "{username}")
pattern = USER_PATTERN + r"/info" pattern = USER_PATTERN + r"/info"
example = "https://www.facebook.com/USERNAME/info" example = "https://www.facebook.com/USERNAME/info"
def items(self): def items(self):
user = self._extract_profile_photos_page(self.groups[0]) user = self._extract_profile(self.groups[0])
return iter(((Message.Directory, user),)) return iter(((Message.Directory, user),))
@@ -513,7 +518,7 @@ class FacebookPhotosExtractor(FacebookExtractor):
example = "https://www.facebook.com/USERNAME/photos" example = "https://www.facebook.com/USERNAME/photos"
def items(self): def items(self):
set_id = self._extract_profile_photos_page(self.groups[0])["set_id"] set_id = self._extract_profile(self.groups[0], True)["set_id"]
if not set_id: if not set_id:
return iter(()) return iter(())
@@ -530,7 +535,7 @@ class FacebookAvatarExtractor(FacebookExtractor):
example = "https://www.facebook.com/USERNAME/avatar" example = "https://www.facebook.com/USERNAME/avatar"
def items(self): def items(self):
user = self._extract_profile_photos_page(self.groups[0]) user = self._extract_profile(self.groups[0])
avatar_page_url = user["profilePhoto"]["url"] avatar_page_url = user["profilePhoto"]["url"]
avatar_page = self.photo_page_request_wrapper(avatar_page_url).text avatar_page = self.photo_page_request_wrapper(avatar_page_url).text

View File

@@ -63,7 +63,7 @@ __tests__ = (
"#range" : "1", "#range" : "1",
"user_id" : "100074229772340", "user_id" : "100074229772340",
"user_pfbid": r"re:pfbid\w{66}", "user_pfbid": r"re:pfbid\w{64}",
}, },
{ {
@@ -116,7 +116,7 @@ __tests__ = (
"set_id" : "a.104622317759666", "set_id" : "a.104622317759666",
"type" : "avatar", "type" : "avatar",
"user_id" : "100046356937542", "user_id" : "100046356937542",
"user_pfbid": r"re:pfbid\w{66}", "user_pfbid": r"re:pfbid\w{64}",
"username" : "Throwaway Idk", "username" : "Throwaway Idk",
}, },
@@ -210,7 +210,7 @@ __tests__ = (
"id" : "221820450302279", "id" : "221820450302279",
"set_id" : "a.109762038174788", "set_id" : "a.109762038174788",
"user_id" : "100074229772340", "user_id" : "100074229772340",
"user_pfbid": r"re:pfbid\w{66}", "user_pfbid": r"re:pfbid\w{64}",
"username": "Throwaway Kwon", "username": "Throwaway Kwon",
}, },