diff --git a/docs/configuration.rst b/docs/configuration.rst
index 8bb2cdcc..d7051a5e 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -1006,6 +1006,7 @@ Default
``4chanarchives`` |
``archivedmoe`` |
``nsfwalbum`` |
+ ``pholder`` |
``tumblrgallery``
``true``
otherwise
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index bdf20c20..4ac53fb6 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -799,6 +799,12 @@ Consider all listed sites to potentially be NSFW.
Collections, individual Images, Search Results, User Profiles |
|
+
+ | pholder |
+ https://pholder.com/ |
+ Search Results, Subreddits, User Profiles |
+ |
+
| PhotoVogue |
https://www.vogue.com/photovogue/ |
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 2989bc36..29b9b719 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -157,6 +157,7 @@ modules = [
"patreon",
"pexels",
"philomena",
+ "pholder",
"photovogue",
"picarto",
"picazor",
diff --git a/gallery_dl/extractor/pholder.py b/gallery_dl/extractor/pholder.py
new file mode 100644
index 00000000..12e150d4
--- /dev/null
+++ b/gallery_dl/extractor/pholder.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pholder.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?pholder\.com"
+
+
+def _thumb_resolution(thumbnail):
+ try:
+ return int(thumbnail["width"]) * int(thumbnail["height"])
+ except Exception:
+ return 0
+
+
+class PholderExtractor(Extractor):
+ """Base class for pholder extractors"""
+ category = "pholder"
+ root = "https://pholder.com"
+ directory_fmt = ("{category}", "{subredditTitle}")
+ filename_fmt = "{id}{gallery_id:? / /}{title:? //[:225]}.{extension}"
+ archive_fmt = "{id}_{filename}_{gallery_id:? / /}"
+ request_interval = (2.0, 4.0)
+ referer = False
+
+ def _parse_window_data(self, html):
+ # sometimes, window.data content is split across multiple script
+ # blocks.
+ tag_prefix = len("window_data = ")
+ window_data_content = ""
+ split_data = False
+
+ for tag in text.split_html(html):
+ if tag.startswith("window.data = "):
+ try:
+ return util.json_loads(tag[tag_prefix:])
+ except ValueError:
+ split_data = True
+
+ if split_data:
+ try:
+ window_data_content += tag
+ return util.json_loads(window_data_content[tag_prefix:])
+ except ValueError:
+ pass
+
+ raise exception.AbortExtraction("Could not locate window.data JSON.")
+
+ def _posts(self, page_url):
+ params = {"page": 1}
+ while True:
+ html = self.request(page_url, params=params).text
+ window_data = self._parse_window_data(html)
+
+ for item in window_data["media"]:
+ data = item["_source"]
+ data["id"] = item["_id"]
+ data["date"] = self.parse_timestamp(data.get("submitted_utc"))
+
+ if ":" in data["id"]:
+ # this is a gallery
+ # (can also see from item["is_gallery"])
+ # pholder does not preserver gallery order, but assigns
+ # each image a sub-id.
+ data["id"], _, data["gallery_id"] = \
+ data["id"].partition(":")
+ else:
+ data["gallery_id"] = ""
+
+ yield Message.Directory, "", data
+
+ for thumb in sorted(
+ data["thumbnails"],
+ key=lambda e: _thumb_resolution(e), reverse=True):
+ # try to use highest-resolution URLs from thumbnails first.
+ url = thumb["url"]
+ if url.rindex(":") > url.index(":"):
+ # sometimes, thumbnail image URLs end with ":large" or
+ # ":small", so we have to strip out any trailing
+ # ":word" bits.
+ url = url.rpartition(":")[0]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+ break
+ else:
+ # Fallback to origin
+ url = data["origin"]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ if len(window_data["media"]) < 150:
+ break
+
+ params["page"] += 1
+
+ def items(self):
+ url = f"{self.root}/{self.groups[0]}"
+ return self._posts(url)
+
+
+class PholderSubredditExtractor(PholderExtractor):
+ """Extractor for media from pholder-stored posts for a subreddit"""
+ subcategory = "subreddit"
+ pattern = BASE_PATTERN + r"(/r/([^/?#]+))(?:/?\?([^#]+))?"
+ example = "https://pholder.com/r/SUBREDDIT"
+
+
+class PholderUserExtractor(PholderExtractor):
+ """Extractor for URLs from pholder-stored posts for a reddit user"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "u_{author}")
+ pattern = BASE_PATTERN + r"(/u/[^/?#]+)(?:/?\?([^#]+))?"
+ example = "https://www.pholder.com/u/USER"
+
+
+class PholderSearchExtractor(PholderExtractor):
+ """Extractor for URLs from pholder-stored posts for a search"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/(.*)"
+ example = "https://www.pholder.com/SEARCH"
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 9fdf05bc..e9bd15e1 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -120,7 +120,7 @@ def nameext_from_url(url, data=None):
def nameext_from_name(filename, data=None):
- """Extract the last part of an URL and fill 'data' accordingly"""
+ """Extract the last part of a file name and fill 'data' accordingly"""
if data is None:
data = {}
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index f62aea8b..2d591ca6 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -153,6 +153,7 @@ CATEGORY_MAP = {
"nudostarforum" : "NudoStar Forums",
"okporn" : "OK.PORN",
"paheal" : "Rule 34",
+ "pholder" : "pholder",
"photovogue" : "PhotoVogue",
"picstate" : "PicState",
"pidgiwiki" : "PidgiWiki",
diff --git a/test/results/pholder.py b/test/results/pholder.py
new file mode 100644
index 00000000..46baf7fd
--- /dev/null
+++ b/test/results/pholder.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import pholder
+
+
+__tests__ = (
+{
+ "#url" : "https://pholder.com/r/lavaporn",
+ "#category": ("", "pholder", "subreddit"),
+ "#class" : pholder.PholderSubredditExtractor,
+ "#range" : "1-20",
+ "#count" : ">= 20",
+},
+
+{
+ "#url" : "https://pholder.com/r/lavaporn/",
+ "#category": ("", "pholder", "subreddit"),
+ "#class" : pholder.PholderSubredditExtractor,
+},
+
+{
+ "#url" : "https://pholder.com/u/automoderator",
+ "#category": ("", "pholder", "user"),
+ "#class" : pholder.PholderUserExtractor,
+ "#range" : "1-20",
+ "#count" : ">= 20",
+},
+
+{
+ "#url" : "https://pholder.com/u/automoderator/",
+ "#category": ("", "pholder", "user"),
+ "#class" : pholder.PholderUserExtractor,
+},
+
+{
+ "#url" : "https://pholder.com/search-text",
+ "#category": ("", "pholder", "search"),
+ "#class" : pholder.PholderSearchExtractor,
+ "#range" : "1-10",
+ "#count" : "== 10",
+},
+
+)