diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index cbae24ac..b40ba5a1 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -694,7 +694,7 @@ Consider all sites to be NSFW unless otherwise known.
| PornPics.com |
https://www.pornpics.com/ |
- Galleries |
+ Galleries, Search Results, Tag Searches |
|
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
index e14263d1..9627ebfd 100644
--- a/gallery_dl/extractor/pornpics.py
+++ b/gallery_dl/extractor/pornpics.py
@@ -8,25 +8,59 @@
"""Extractors for https://www.pornpics.com/"""
-from .common import GalleryExtractor, Extractor
+from .common import GalleryExtractor, Extractor, Message
from .. import text
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?pornpics\.com"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?pornpics\.com(?:/\w\w)?"
class PornpicsExtractor(Extractor):
"""Base class for pornpics extractors"""
category = "pornpics"
- root = "https://www.pornpics.com/"
+ root = "https://www.pornpics.com"
+ request_interval = (0.5, 1.5)
def __init__(self, match):
super().__init__(match)
self.session.headers["Referer"] = self.root
+ def items(self):
+ for gallery in self.galleries():
+ gallery["_extractor"] = PornpicsGalleryExtractor
+ yield Message.Queue, gallery["g_url"], gallery
+
+ def _pagination(self, url, params):
+ offset = params["offset"]
+ limit = params["limit"] = 20
+
+ headers = {
+ "Accept": "application/json, text/javascript, */*; q=0.01",
+ "Referer": url if offset else self.root + "/",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+
+ if offset:
+ # fetch first 20 galleries from HTML
+ # since '"offset": 0' does not return a JSON response
+ page = self.request(url).text
+ for path in text.extract_iter(
+ page, 'class="rel-link" href="', '"'):
+ yield {"g_url": self.root + path}
+ del page
+
+ while True:
+ galleries = self.request(
+ url, params=params, headers=headers).json()
+ yield from galleries
+
+ if len(galleries) < limit:
+ return
+ params["offset"] += limit
+
class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
"""Extractor for pornpics galleries"""
- pattern = BASE_PATTERN + r"(?:/\w\w)?(/galleries/(?:[^/?#]+-)?(\d+))"
+ pattern = BASE_PATTERN + r"(/galleries/(?:[^/?#]+-)?(\d+))"
test = (
(("https://www.pornpics.com/galleries/british-beauty-danielle-flashes-"
"hot-breasts-ass-and-snatch-in-the-forest-62610699/"), {
@@ -56,8 +90,10 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
)
def __init__(self, match):
- self.gallery_id = match.group(2)
PornpicsExtractor.__init__(self, match)
+ self.gallery_id = match.group(2)
+
+ items = GalleryExtractor.items
def metadata(self, page):
extr = text.extract_from(page)
@@ -81,3 +117,53 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
(url, None)
for url in text.extract_iter(page, "class='rel-link' href='", "'")
]
+
+
+class PornpicsTagExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics tag searches"""
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"/tags/([^/?#]+)"
+ test = (
+ ("https://www.pornpics.com/tags/summer-dress/", {
+ "pattern": PornpicsGalleryExtractor.pattern,
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://pornpics.com/fr/tags/summer-dress"),
+ )
+
+ def __init__(self, match):
+ PornpicsExtractor.__init__(self, match)
+ self.tag = match.group(1)
+
+ def galleries(self):
+ url = "{}/tags/{}/".format(self.root, self.tag)
+ params = {"offset": 20}
+ return self._pagination(url, params)
+
+
+class PornpicsSearchExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics tag searches"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/\?q=([^]+)"
+ test = (
+ ("https://www.pornpics.com/?q=nature", {
+ "pattern": PornpicsGalleryExtractor.pattern,
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://pornpics.com/jp/?q=nature"),
+ )
+
+ def __init__(self, match):
+ PornpicsExtractor.__init__(self, match)
+ self.search = match.group(1)
+
+ def galleries(self):
+ url = self.root + "/search/srch.php"
+ params = {
+ "q" : self.search,
+ "lang" : "en",
+ "offset": 0,
+ }
+ return self._pagination(url, params)