diff --git a/docs/configuration.rst b/docs/configuration.rst index cf840c04..b548fb5c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2680,6 +2680,7 @@ Description * ``avatar`` * ``photos`` + * ``albums`` It is possible to use ``"all"`` instead of listing all values separately. diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a028d638..f9ac6467 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -280,7 +280,7 @@ Consider all listed sites to potentially be NSFW. Facebook https://www.facebook.com/ - Avatars, Photos, Profile Photos, Sets, User Profiles, Videos + Albums, Avatars, Photos, Profile Photos, Sets, User Profiles, Videos Cookies diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 173321d5..9cc44215 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -7,7 +7,7 @@ """Extractors for https://www.facebook.com/""" from .common import Extractor, Message, Dispatch -from .. import text, exception +from .. import text, util, exception from ..cache import memcache BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com" @@ -440,6 +440,34 @@ class FacebookVideoExtractor(FacebookExtractor): yield Message.Url, audio["url"], audio +class FacebookAlbumsExtractor(FacebookExtractor): + """Extractor for Facebook Profile albums""" + subcategory = "albums" + pattern = USER_PATTERN + r"/photos_albums" + example = "https://www.facebook.com/USERNAME/photos_albums" + + def items(self): + url = f"{self.root}/{self.groups[0]}/photos_albums" + page = self.request(url).text + + pos = page.find( + '"TimelineAppCollectionAlbumsRenderer","collection":{"id":"') + if pos < 0: + return + + items = text.extract(page, '},"pageItems":', '}}},', pos)[0] + edges = util.json_loads(items + "}}")["edges"] + + # TODO: use /graphql API endpoint + for edge in edges: + node = edge["node"] + album = node["node"] + album["_extractor"] = FacebookSetExtractor + album["title"] = node["title"]["text"] + album["thumbnail"] = (img := node["image"]) and img["uri"] + yield Message.Queue, album["url"], album + + class FacebookPhotosExtractor(FacebookExtractor): """Extractor for Facebook Profile Photos""" subcategory = "photos" @@ -489,4 +517,5 @@ class FacebookUserExtractor(Dispatch, FacebookExtractor): return self._dispatch_extractors(( (FacebookAvatarExtractor, base + "avatar"), (FacebookPhotosExtractor, base + "photos"), + (FacebookAlbumsExtractor, base + "photos_albums"), ), ("photos",)) diff --git a/test/results/facebook.py b/test/results/facebook.py index 0d99130b..e7632323 100644 --- a/test/results/facebook.py +++ b/test/results/facebook.py @@ -244,4 +244,25 @@ __tests__ = ( "username" : "Facebook", }, +{ + "#url" : "https://www.facebook.com/facebook/photos_albums", + "#class" : facebook.FacebookAlbumsExtractor, + "#pattern" : facebook.FacebookSetExtractor.pattern, + "#results" : [ + "https://www.facebook.com/media/set/?set=a.736550598516963&type=3", + "https://www.facebook.com/media/set/?set=a.736550611850295&type=3", + "https://www.facebook.com/media/set/?set=a.1198986285606723&type=3", + "https://www.facebook.com/media/set/?set=a.1188430493328969&type=3", + "https://www.facebook.com/media/set/?set=a.1182920610546624&type=3", + "https://www.facebook.com/media/set/?set=a.1152503723588313&type=3", + "https://www.facebook.com/media/set/?set=a.912647394240615&type=3", + "https://www.facebook.com/media/set/?set=a.862611645910857&type=3", + ], + + "id" : r"re:\d+", + "thumbnail": {str, None}, + "title" : str, + "url" : str, +}, + )