diff --git a/docs/configuration.rst b/docs/configuration.rst index b548fb5c..543a8a31 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2678,6 +2678,7 @@ Description Supported values are + * ``info`` * ``avatar`` * ``photos`` * ``albums`` diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f9ac6467..ae94ba77 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -280,7 +280,7 @@ Consider all listed sites to potentially be NSFW. Facebook https://www.facebook.com/ - Albums, Avatars, Photos, Profile Photos, Sets, User Profiles, Videos + Albums, Avatars, User Profile Information, Photos, Profile Photos, Sets, User Profiles, Videos Cookies diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index d90d44b1..c646d1b8 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -322,17 +322,39 @@ class FacebookExtractor(Extractor): "authenticated cookies", "profile", "This content isn't available right now") - set_id = self._extract_profile_set_id(profile_photos_page) - avatar_page_url = text.extr( - profile_photos_page, ',"profilePhoto":{"url":"', '"') + set_id = self._extract_profile_set_id( + profile_photos_page) + user_data = text.extr( + profile_photos_page, '","user":{"', '},"viewer":{') - if set_id or avatar_page_url: + if set_id or user_data: break self.log.debug("Got empty profile photos page, retrying...") else: raise exception.AbortExtraction("Failed to extract profile data") - return set_id, avatar_page_url.replace("\\/", "/") + try: + data = util.json_loads(f'{{"{user_data}}}') + except Exception: + data = {} + self.log.debug(user_data) + + try: + data["profile_tabs"] = [ + edge["node"] + for edge in (data["profile_tabs"]["profile_user"] + ["timeline_nav_app_sections"]["edges"]) + ] + except Exception: + pass + + data["set_id"] = set_id + data["vanity"] = ( + text.extr(profile_photos_page, '"userVanity":"', '"') or + text.extr(profile_photos_page, '"vanity":"', '"') + ) + + return data def _extract_profile_set_id(self, profile_photos_page): set_ids_raw = text.extr( @@ -440,6 +462,17 @@ class FacebookVideoExtractor(FacebookExtractor): yield Message.Url, audio["url"], audio +class FacebookInfoExtractor(FacebookExtractor): + """Extractor for Facebook Profile data""" + subcategory = "info" + pattern = USER_PATTERN + r"/info" + example = "https://www.facebook.com/USERNAME/info" + + def items(self): + user = self._extract_profile_photos_page(self.groups[0]) + return iter(((Message.Directory, user),)) + + class FacebookAlbumsExtractor(FacebookExtractor): """Extractor for Facebook Profile albums""" subcategory = "albums" @@ -480,7 +513,7 @@ class FacebookPhotosExtractor(FacebookExtractor): example = "https://www.facebook.com/USERNAME/photos" def items(self): - set_id = self._extract_profile_photos_page(self.groups[0])[0] + set_id = self._extract_profile_photos_page(self.groups[0])["set_id"] if not set_id: return iter(()) @@ -497,7 +530,8 @@ class FacebookAvatarExtractor(FacebookExtractor): example = "https://www.facebook.com/USERNAME/avatar" def items(self): - avatar_page_url = self._extract_profile_photos_page(self.groups[0])[1] + user = self._extract_profile_photos_page(self.groups[0]) + avatar_page_url = user["profilePhoto"]["url"] avatar_page = self.photo_page_request_wrapper(avatar_page_url).text avatar = self.parse_photo_page(avatar_page) @@ -520,6 +554,7 @@ class FacebookUserExtractor(Dispatch, FacebookExtractor): def items(self): base = f"{self.root}/{self.groups[0]}/" return self._dispatch_extractors(( + (FacebookInfoExtractor , base + "info"), (FacebookAvatarExtractor, base + "avatar"), (FacebookPhotosExtractor, base + "photos"), (FacebookAlbumsExtractor, base + "photos_albums"), diff --git a/test/results/facebook.py b/test/results/facebook.py index b4c97fa4..c54fa753 100644 --- a/test/results/facebook.py +++ b/test/results/facebook.py @@ -28,6 +28,18 @@ __tests__ = ( "#results" : "https://www.facebook.com/100064860875397/photos" }, +{ + "#url" : "https://www.facebook.com/facebook", + "#class" : facebook.FacebookUserExtractor, + "#options" : {"include": "all"}, + "#results" : [ + "https://www.facebook.com/facebook/info", + "https://www.facebook.com/facebook/avatar", + "https://www.facebook.com/facebook/photos", + "https://www.facebook.com/facebook/photos_albums", + ], +}, + { "#url" : "https://www.facebook.com/facebook/photos", "#class" : facebook.FacebookPhotosExtractor, @@ -51,7 +63,7 @@ __tests__ = ( "#range" : "1", "user_id" : "100074229772340", - "user_pfbid": r"re:pfbid0x\w{64}", + "user_pfbid": r"re:pfbid\w{66}", }, { @@ -104,7 +116,7 @@ __tests__ = ( "set_id" : "a.104622317759666", "type" : "avatar", "user_id" : "100046356937542", - "user_pfbid": r"re:pfbid0x\w{64}", + "user_pfbid": r"re:pfbid\w{66}", "username" : "Throwaway Idk", }, @@ -198,7 +210,7 @@ __tests__ = ( "id" : "221820450302279", "set_id" : "a.109762038174788", "user_id" : "100074229772340", - "user_pfbid": r"re:pfbid0x\w{64}", + "user_pfbid": r"re:pfbid\w{66}", "username": "Throwaway Kwon", }, @@ -278,4 +290,9 @@ __tests__ = ( "url" : "https://www.facebook.com/media/set/?set=a.736550611850295&type=3", }, +{ + "#url" : "https://www.facebook.com/brando.cha.3/info", + "#class" : facebook.FacebookInfoExtractor, +}, + )