diff --git a/docs/configuration.rst b/docs/configuration.rst
index 0dce61f2..63b72ea1 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -2600,6 +2600,27 @@ description
Extract comments that include photo attachments made by the author of the post.
+extractor.facebook.include
+--------------------------
+Type
+ * ``string``
+ * ``list`` of ``strings``
+Default
+ ``"photos"``
+Example
+ * ``"avatar,photos"``
+ * ``["avatar", "photos"]``
+Description
+ A (comma-separated) list of subcategories to include
+ when processing a user profile.
+
+ Supported values are
+ * ``"avatar"``
+ * ``"photos"``
+
+ It is possible to use ``"all"`` instead of listing all values separately.
+
+
extractor.facebook.videos
-------------------------
Type
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index eb4a657c..45cade66 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -293,6 +293,14 @@
"limits-action" : "stop",
"fallback-retries": 2
},
+ "facebook":
+ {
+ "cookies": null,
+
+ "author-followups": false,
+ "include": "photos",
+ "videos" : true
+ },
"fanbox":
{
"cookies" : null,
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 5a2f5425..90669ed9 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -274,7 +274,7 @@ Consider all listed sites to potentially be NSFW.
| Facebook |
https://www.facebook.com/ |
- Photos, Profiles, Sets, Videos |
+ Avatars, Photos, Profile Photos, Sets, User Profiles, Videos |
Cookies |
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index d8bb2f00..069ed994 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -6,10 +6,14 @@
"""Extractors for https://www.facebook.com/"""
-from .common import Extractor, Message
+from .common import Extractor, Message, Dispatch
from .. import text, exception
+from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com"
+USER_PATTERN = (BASE_PATTERN +
+ r"/(?!media/|photo/|photo.php|watch/)"
+ r"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?]+)")
class FacebookExtractor(Extractor):
@@ -291,6 +295,36 @@ class FacebookExtractor(Extractor):
i += 1
+ @memcache(keyarg=1)
+ def _extract_profile_photos_page(self, profile):
+ profile_photos_url = f"{self.root}/{profile}/photos_by"
+
+ for _ in range(self.fallback_retries + 1):
+ profile_photos_page = self.request(profile_photos_url).text
+ if set_id := self._extract_profile_set_id(profile_photos_page):
+ break
+ self.log.debug("Got empty profile photos page, retrying...")
+ else:
+ raise exception.AbortExtraction("Failed to extract profile data")
+
+ avatar_page_url = text.extr(
+ profile_photos_page, ',"profilePhoto":{"url":"', '"')
+
+ return set_id, avatar_page_url.replace("\\/", "/")
+
+ def _extract_profile_set_id(self, profile_photos_page):
+ set_ids_raw = text.extr(
+ profile_photos_page, '"pageItems"', '"page_info"'
+ )
+
+ set_id = text.extr(
+ set_ids_raw, 'set=', '"'
+ ).rsplit("&", 1)[0] or text.extr(
+ set_ids_raw, '\\/photos\\/', '\\/'
+ )
+
+ return set_id
+
class FacebookSetExtractor(FacebookExtractor):
"""Base class for Facebook Set extractors"""
@@ -384,47 +418,50 @@ class FacebookVideoExtractor(FacebookExtractor):
yield Message.Url, audio["url"], audio
-class FacebookProfileExtractor(FacebookExtractor):
- """Base class for Facebook Profile Photos Set extractors"""
- subcategory = "profile"
- pattern = (
- BASE_PATTERN +
- r"/(?!media/|photo/|photo.php|watch/)"
- r"(?:profile\.php\?id=|people/[^/?#]+/)?"
- r"([^/?]+)(?:/photos(?:_by)?|/videos|/posts)?/?(?:$|\?|#)"
- )
- example = "https://www.facebook.com/USERNAME"
-
- def get_profile_photos_set_id(self, profile_photos_page):
- set_ids_raw = text.extr(
- profile_photos_page, '"pageItems"', '"page_info"'
- )
-
- set_id = text.extr(
- set_ids_raw, 'set=', '"'
- ).rsplit("&", 1)[0] or text.extr(
- set_ids_raw, '\\/photos\\/', '\\/'
- )
-
- return set_id
+class FacebookPhotosExtractor(FacebookExtractor):
+ """Extractor for Facebook Profile Photos"""
+ subcategory = "photos"
+ pattern = USER_PATTERN + r"/photos(?:_by)?"
+ example = "https://www.facebook.com/USERNAME/photos"
def items(self):
- profile_photos_url = (
- self.root + "/" + self.groups[0] + "/photos_by"
- )
+ set_id = self._extract_profile_photos_page(self.groups[0])[0]
+ set_url = f"{self.root}/media/set/?set={set_id}"
+ set_page = self.request(set_url).text
+ set_data = self.parse_set_page(set_page)
+ return self.extract_set(set_data)
- for _ in range(self.fallback_retries + 1):
- profile_photos_page = self.request(profile_photos_url).text
- set_id = self.get_profile_photos_set_id(profile_photos_page)
- if set_id:
- break
- self.log.debug("Failed to find profile photos set ID, retrying...")
- if set_id:
- set_url = f"{self.root}/media/set/?set={set_id}"
- set_page = self.request(set_url).text
- set_data = self.parse_set_page(set_page)
- return self.extract_set(set_data)
+class FacebookAvatarExtractor(FacebookExtractor):
+ """Extractor for Facebook Profile Avatars"""
+ subcategory = "avatar"
+ pattern = USER_PATTERN + r"/avatar"
+ example = "https://www.facebook.com/USERNAME/avatar"
- self.log.debug("Profile photos set ID not found.")
- return iter(())
+ def items(self):
+ avatar_page_url = self._extract_profile_photos_page(self.groups[0])[1]
+ avatar_page = self.photo_page_request_wrapper(avatar_page_url).text
+
+ avatar = self.parse_photo_page(avatar_page)
+ avatar["count"] = avatar["num"] = 1
+ avatar["type"] = "avatar"
+
+ set_url = f"{self.root}/media/set/?set={avatar['set_id']}"
+ set_page = self.request(set_url).text
+ directory = self.parse_set_page(set_page)
+
+ yield Message.Directory, directory
+ yield Message.Url, avatar["url"], avatar
+
+
+class FacebookUserExtractor(Dispatch, FacebookExtractor):
+ """Extractor for Facebook Profiles"""
+ pattern = USER_PATTERN + r"/?(?:$|\?|#)"
+ example = "https://www.facebook.com/USERNAME"
+
+ def items(self):
+ base = f"{self.root}/{self.groups[0]}/"
+ return self._dispatch_extractors((
+ (FacebookAvatarExtractor, base + "avatar"),
+ (FacebookPhotosExtractor, base + "photos"),
+ ), ("photos",))
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 46399bfb..36708ff3 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -260,6 +260,9 @@ SUBCATEGORY_MAP = {
"discord": {
"direct-message" : "",
},
+ "facebook": {
+ "photos" : "Profile Photos",
+ },
"fanbox": {
"supporting": "Supported User Feed",
"redirect" : "Pixiv Redirects",
diff --git a/test/results/facebook.py b/test/results/facebook.py
index a445a6ae..a4a554ff 100644
--- a/test/results/facebook.py
+++ b/test/results/facebook.py
@@ -11,46 +11,71 @@ import datetime
__tests__ = (
{
"#url" : "https://www.facebook.com/facebook",
- "#category": ("", "facebook", "profile"),
- "#class" : facebook.FacebookProfileExtractor,
+ "#class" : facebook.FacebookUserExtractor,
+ "#results" : "https://www.facebook.com/facebook/photos"
+},
+
+{
+ "#url" : "https://www.facebook.com/people/facebook/100064860875397/?sk=photos",
+ "#class" : facebook.FacebookUserExtractor,
+ "#results" : "https://www.facebook.com/100064860875397/photos"
+},
+
+{
+ "#url" : "https://www.facebook.com/profile.php?id=100064860875397",
+ "#class" : facebook.FacebookUserExtractor,
+ "#results" : "https://www.facebook.com/100064860875397/photos"
+},
+
+{
+ "#url" : "https://www.facebook.com/facebook/photos",
+ "#class" : facebook.FacebookPhotosExtractor,
+
"#range" : "1-3",
"#count" : 3,
},
{
- "#url" : "https://www.facebook.com/facebook/photos",
- "#category": ("", "facebook", "profile"),
- "#class" : facebook.FacebookProfileExtractor,
+ "#url" : "https://www.facebook.com/100064860875397/photos",
+ "#class" : facebook.FacebookPhotosExtractor,
+
+ "#range" : "1-3",
+ "#count" : 3,
},
{
"#url" : "https://www.facebook.com/facebook/photos_by",
- "#category": ("", "facebook", "profile"),
- "#class" : facebook.FacebookProfileExtractor,
+ "#class" : facebook.FacebookPhotosExtractor,
},
{
- "#url" : "https://www.facebook.com/people/facebook/100064860875397/?sk=photos",
- "#category": ("", "facebook", "profile"),
- "#class" : facebook.FacebookProfileExtractor,
-},
+ "#url" : "https://www.facebook.com/facebook/avatar",
+ "#class" : facebook.FacebookAvatarExtractor,
+ "#pattern" : r"https://scontent-[^7?#]+\.fbcdn\.net/v/t39.30808-6/380700650_10162533193146729_2379134611963304810_n.jpg?.+",
+ "#count" : 1,
-{
- "#url" : "https://www.facebook.com/profile.php?id=100064860875397",
- "#category": ("", "facebook", "profile"),
- "#class" : facebook.FacebookProfileExtractor,
+ "caption" : "",
+ "count" : 1,
+ "date" : "dt:2023-10-06 21:13:59",
+ "extension": "jpg",
+ "filename" : str,
+ "id" : "736550615183628",
+ "num" : 1,
+ "set_id" : "a.736550601850296",
+ "type" : "avatar",
+ "url" : str,
+ "user_id" : "100064860875397",
+ "username" : "Facebook",
},
{
"#url" : "https://www.facebook.com/media/set/?set=a.10152716010956729&type=3",
- "#category": ("", "facebook", "set"),
"#class" : facebook.FacebookSetExtractor,
"#count" : 6,
},
{
"#url" : "https://www.facebook.com/joho.press.jp/posts/pfbid02mfFRpVkErLQxQ8cpD2f1hwXEVsFzK8kfNBKdK2Jndnx6AkmMQZuXhovwDgwvoDNil",
- "#category": ("", "facebook", "set"),
"#class" : facebook.FacebookSetExtractor,
"#range" : "1-3",
"#count" : 3,
@@ -62,20 +87,17 @@ __tests__ = (
{
"#url" : "https://www.facebook.com/photo/?fbid=10152716011076729&set=a.10152716010956729&setextract",
- "#category": ("", "facebook", "set"),
"#class" : facebook.FacebookSetExtractor,
"#count" : 4,
},
{
"#url" : "https://www.facebook.com/photo.php?fbid=10165113568399554&set=t.100064860875397&type=3",
- "#category": ("", "facebook", "photo"),
"#class" : facebook.FacebookPhotoExtractor,
},
{
"#url" : "https://www.facebook.com/photo/?fbid=10160743390456729",
- "#category": ("", "facebook", "photo"),
"#class" : facebook.FacebookPhotoExtractor,
"#count" : 1,
@@ -92,13 +114,11 @@ __tests__ = (
{
"#url" : "https://www.facebook.com/photo/?fbs=home&fbid=10160743390456729",
- "#category": ("", "facebook", "photo"),
"#class" : facebook.FacebookPhotoExtractor,
},
{
"#url" : "https://www.facebook.com/Facebook/photos/a.10152716010956729/10152716011076729",
- "#category": ("", "facebook", "photo"),
"#class" : facebook.FacebookPhotoExtractor,
"#count" : 1,
@@ -116,7 +136,6 @@ __tests__ = (
{
"#url" : "https://www.facebook.com/photo.php?fbid=1156625586261770",
"#comment" : "surrogate pair in 'caption' data (#6599)",
- "#category": ("", "facebook", "photo"),
"#class" : facebook.FacebookPhotoExtractor,
"caption" : "A century of innovation parked side by side.\n\n📸: Vocabutesla via X",
@@ -125,7 +144,6 @@ __tests__ = (
{
"#url" : "https://www.facebook.com/photo.php?fbid=989340003138066&set=pb.100061862277212.-2207520000&type=3",
"#comment" : "no 'publish_time' (#7151)",
- "#category": ("", "facebook", "photo"),
"#class" : facebook.FacebookPhotoExtractor,
"date" : "dt:2025-02-25 15:00:09",
@@ -133,7 +151,6 @@ __tests__ = (
{
"#url" : "https://www.facebook.com/watch/?v=1165557851291824",
- "#category": ("", "facebook", "video"),
"#class" : facebook.FacebookVideoExtractor,
"#count" : 1,
@@ -147,7 +164,6 @@ __tests__ = (
{
"#url" : "https://www.facebook.com/100064860875397/videos/644342003942740",
- "#category": ("", "facebook", "video"),
"#class" : facebook.FacebookVideoExtractor,
"#count" : 2,