diff --git a/docs/configuration.rst b/docs/configuration.rst
index b548fb5c..543a8a31 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -2678,6 +2678,7 @@ Description
Supported values are
+ * ``info``
* ``avatar``
* ``photos``
* ``albums``
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index f9ac6467..ae94ba77 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -280,7 +280,7 @@ Consider all listed sites to potentially be NSFW.
| Facebook |
https://www.facebook.com/ |
- Albums, Avatars, Photos, Profile Photos, Sets, User Profiles, Videos |
+ Albums, Avatars, User Profile Information, Photos, Profile Photos, Sets, User Profiles, Videos |
Cookies |
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index d90d44b1..c646d1b8 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -322,17 +322,39 @@ class FacebookExtractor(Extractor):
"authenticated cookies", "profile",
"This content isn't available right now")
- set_id = self._extract_profile_set_id(profile_photos_page)
- avatar_page_url = text.extr(
- profile_photos_page, ',"profilePhoto":{"url":"', '"')
+ set_id = self._extract_profile_set_id(
+ profile_photos_page)
+ user_data = text.extr(
+ profile_photos_page, '","user":{"', '},"viewer":{')
- if set_id or avatar_page_url:
+ if set_id or user_data:
break
self.log.debug("Got empty profile photos page, retrying...")
else:
raise exception.AbortExtraction("Failed to extract profile data")
- return set_id, avatar_page_url.replace("\\/", "/")
+ try:
+ data = util.json_loads(f'{{"{user_data}}}')
+ except Exception:
+ data = {}
+ self.log.debug(user_data)
+
+ try:
+ data["profile_tabs"] = [
+ edge["node"]
+ for edge in (data["profile_tabs"]["profile_user"]
+ ["timeline_nav_app_sections"]["edges"])
+ ]
+ except Exception:
+ pass
+
+ data["set_id"] = set_id
+ data["vanity"] = (
+ text.extr(profile_photos_page, '"userVanity":"', '"') or
+ text.extr(profile_photos_page, '"vanity":"', '"')
+ )
+
+ return data
def _extract_profile_set_id(self, profile_photos_page):
set_ids_raw = text.extr(
@@ -440,6 +462,17 @@ class FacebookVideoExtractor(FacebookExtractor):
yield Message.Url, audio["url"], audio
+class FacebookInfoExtractor(FacebookExtractor):
+ """Extractor for Facebook Profile data"""
+ subcategory = "info"
+ pattern = USER_PATTERN + r"/info"
+ example = "https://www.facebook.com/USERNAME/info"
+
+ def items(self):
+ user = self._extract_profile_photos_page(self.groups[0])
+ return iter(((Message.Directory, user),))
+
+
class FacebookAlbumsExtractor(FacebookExtractor):
"""Extractor for Facebook Profile albums"""
subcategory = "albums"
@@ -480,7 +513,7 @@ class FacebookPhotosExtractor(FacebookExtractor):
example = "https://www.facebook.com/USERNAME/photos"
def items(self):
- set_id = self._extract_profile_photos_page(self.groups[0])[0]
+ set_id = self._extract_profile_photos_page(self.groups[0])["set_id"]
if not set_id:
return iter(())
@@ -497,7 +530,8 @@ class FacebookAvatarExtractor(FacebookExtractor):
example = "https://www.facebook.com/USERNAME/avatar"
def items(self):
- avatar_page_url = self._extract_profile_photos_page(self.groups[0])[1]
+ user = self._extract_profile_photos_page(self.groups[0])
+ avatar_page_url = user["profilePhoto"]["url"]
avatar_page = self.photo_page_request_wrapper(avatar_page_url).text
avatar = self.parse_photo_page(avatar_page)
@@ -520,6 +554,7 @@ class FacebookUserExtractor(Dispatch, FacebookExtractor):
def items(self):
base = f"{self.root}/{self.groups[0]}/"
return self._dispatch_extractors((
+ (FacebookInfoExtractor , base + "info"),
(FacebookAvatarExtractor, base + "avatar"),
(FacebookPhotosExtractor, base + "photos"),
(FacebookAlbumsExtractor, base + "photos_albums"),
diff --git a/test/results/facebook.py b/test/results/facebook.py
index b4c97fa4..c54fa753 100644
--- a/test/results/facebook.py
+++ b/test/results/facebook.py
@@ -28,6 +28,18 @@ __tests__ = (
"#results" : "https://www.facebook.com/100064860875397/photos"
},
+{
+ "#url" : "https://www.facebook.com/facebook",
+ "#class" : facebook.FacebookUserExtractor,
+ "#options" : {"include": "all"},
+ "#results" : [
+ "https://www.facebook.com/facebook/info",
+ "https://www.facebook.com/facebook/avatar",
+ "https://www.facebook.com/facebook/photos",
+ "https://www.facebook.com/facebook/photos_albums",
+ ],
+},
+
{
"#url" : "https://www.facebook.com/facebook/photos",
"#class" : facebook.FacebookPhotosExtractor,
@@ -51,7 +63,7 @@ __tests__ = (
"#range" : "1",
"user_id" : "100074229772340",
- "user_pfbid": r"re:pfbid0x\w{64}",
+ "user_pfbid": r"re:pfbid\w{66}",
},
{
@@ -104,7 +116,7 @@ __tests__ = (
"set_id" : "a.104622317759666",
"type" : "avatar",
"user_id" : "100046356937542",
- "user_pfbid": r"re:pfbid0x\w{64}",
+ "user_pfbid": r"re:pfbid\w{66}",
"username" : "Throwaway Idk",
},
@@ -198,7 +210,7 @@ __tests__ = (
"id" : "221820450302279",
"set_id" : "a.109762038174788",
"user_id" : "100074229772340",
- "user_pfbid": r"re:pfbid0x\w{64}",
+ "user_pfbid": r"re:pfbid\w{66}",
"username": "Throwaway Kwon",
},
@@ -278,4 +290,9 @@ __tests__ = (
"url" : "https://www.facebook.com/media/set/?set=a.736550611850295&type=3",
},
+{
+ "#url" : "https://www.facebook.com/brando.cha.3/info",
+ "#class" : facebook.FacebookInfoExtractor,
+},
+
)