[pornhub] add 'gif' support (#4463)
This commit is contained in:
@@ -19,6 +19,35 @@ class PornhubExtractor(Extractor):
|
||||
category = "pornhub"
|
||||
root = "https://www.pornhub.com"
|
||||
|
||||
def _init(self):
|
||||
self.cookies.set(
|
||||
"accessAgeDisclaimerPH", "1", domain=".pornhub.com")
|
||||
|
||||
def _pagination(self, user, path):
|
||||
if "/" not in path:
|
||||
path += "/public"
|
||||
|
||||
url = "{}/{}/{}/ajax".format(self.root, user, path)
|
||||
params = {"page": 1}
|
||||
headers = {
|
||||
"Referer": url[:-5],
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
|
||||
while True:
|
||||
response = self.request(
|
||||
url, method="POST", headers=headers, params=params,
|
||||
allow_redirects=False)
|
||||
|
||||
if 300 <= response.status_code < 400:
|
||||
url = "{}{}/{}/ajax".format(
|
||||
self.root, response.headers["location"], path)
|
||||
continue
|
||||
|
||||
yield response.text
|
||||
|
||||
params["page"] += 1
|
||||
|
||||
|
||||
class PornhubGalleryExtractor(PornhubExtractor):
|
||||
"""Extractor for image galleries on pornhub.com"""
|
||||
@@ -58,9 +87,6 @@ class PornhubGalleryExtractor(PornhubExtractor):
|
||||
self._first = None
|
||||
|
||||
def items(self):
|
||||
self.cookies.set(
|
||||
"accessAgeDisclaimerPH", "1", domain=".pornhub.com")
|
||||
|
||||
data = self.metadata()
|
||||
yield Message.Directory, data
|
||||
for num, image in enumerate(self.images(), 1):
|
||||
@@ -116,17 +142,83 @@ class PornhubGalleryExtractor(PornhubExtractor):
|
||||
return
|
||||
|
||||
|
||||
class PornhubGifExtractor(PornhubExtractor):
|
||||
"""Extractor for pornhub.com gifs"""
|
||||
subcategory = "gif"
|
||||
directory_fmt = ("{category}", "{user}", "gifs")
|
||||
filename_fmt = "{id} {title}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/gif/(\d+)"
|
||||
test = (
|
||||
("https://www.pornhub.com/gif/33643461", {
|
||||
"pattern": r"https://\w+\.phncdn\.com/pics/gifs"
|
||||
r"/033/643/461/33643461a\.webm",
|
||||
"keyword": {
|
||||
"date": "dt:2020-10-31 00:00:00",
|
||||
"extension": "webm",
|
||||
"filename": "33643461a",
|
||||
"id": "33643461",
|
||||
"tags": ["big boobs", "lana rhoades"],
|
||||
"title": "Big boobs",
|
||||
"url": str,
|
||||
"user": "Lana Rhoades",
|
||||
},
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
PornhubExtractor.__init__(self, match)
|
||||
self.gallery_id = match.group(1)
|
||||
|
||||
def items(self):
|
||||
url = "{}/gif/{}".format(self.root, self.gallery_id)
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
|
||||
gif = {
|
||||
"id" : self.gallery_id,
|
||||
"tags" : extr("data-context-tag='", "'").split(","),
|
||||
"title": extr('"name": "', '"'),
|
||||
"url" : extr('"contentUrl": "', '"'),
|
||||
"date" : text.parse_datetime(
|
||||
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
|
||||
"user" : extr('data-mxptext="', '"'),
|
||||
}
|
||||
|
||||
yield Message.Directory, gif
|
||||
yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
|
||||
|
||||
|
||||
class PornhubUserExtractor(PornhubExtractor):
|
||||
"""Extractor for all galleries of a pornhub user"""
|
||||
"""Extractor for a pornhub user"""
|
||||
subcategory = "user"
|
||||
pattern = (BASE_PATTERN + r"/(users|model|pornstar)/([^/?#]+)"
|
||||
"(?:/photos(?:/(public|private|favorites))?)?/?$")
|
||||
pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
|
||||
test = ("https://www.pornhub.com/pornstar/danika-mori",)
|
||||
|
||||
def __init__(self, match):
|
||||
PornhubExtractor.__init__(self, match)
|
||||
self.user = match.group(1)
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
return self._dispatch_extractors((
|
||||
(PornhubPhotosExtractor, base + "photos"),
|
||||
(PornhubGifsExtractor , base + "gifs"),
|
||||
), ("photos",))
|
||||
|
||||
|
||||
class PornhubPhotosExtractor(PornhubExtractor):
|
||||
"""Extractor for all galleries of a pornhub user"""
|
||||
subcategory = "photos"
|
||||
pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
|
||||
"/(photos(?:/[^/?#]+)?)")
|
||||
test = (
|
||||
("https://www.pornhub.com/pornstar/danika-mori/photos", {
|
||||
"pattern": PornhubGalleryExtractor.pattern,
|
||||
"count": ">= 6",
|
||||
}),
|
||||
("https://www.pornhub.com/users/flyings0l0/"),
|
||||
("https://www.pornhub.com/users/flyings0l0/photos/public"),
|
||||
("https://www.pornhub.com/users/flyings0l0/photos/private"),
|
||||
("https://www.pornhub.com/users/flyings0l0/photos/favorites"),
|
||||
@@ -135,33 +227,41 @@ class PornhubUserExtractor(PornhubExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
PornhubExtractor.__init__(self, match)
|
||||
self.type, self.user, self.cat = match.groups()
|
||||
self.user, self.path = match.groups()
|
||||
|
||||
def items(self):
|
||||
url = "{}/{}/{}/photos/{}/ajax".format(
|
||||
self.root, self.type, self.user, self.cat or "public")
|
||||
params = {"page": 1}
|
||||
headers = {
|
||||
"Referer": url[:-5],
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
|
||||
data = {"_extractor": PornhubGalleryExtractor}
|
||||
while True:
|
||||
response = self.request(
|
||||
url, method="POST", headers=headers, params=params,
|
||||
allow_redirects=False)
|
||||
|
||||
if 300 <= response.status_code < 400:
|
||||
url = "{}{}/photos/{}/ajax".format(
|
||||
self.root, response.headers["location"],
|
||||
self.cat or "public")
|
||||
continue
|
||||
|
||||
for page in self._pagination(self.user, self.path):
|
||||
gid = None
|
||||
for gid in text.extract_iter(response.text, 'id="albumphoto', '"'):
|
||||
for gid in text.extract_iter(page, 'id="albumphoto', '"'):
|
||||
yield Message.Queue, self.root + "/album/" + gid, data
|
||||
if gid is None:
|
||||
return
|
||||
|
||||
params["page"] += 1
|
||||
|
||||
class PornhubGifsExtractor(PornhubExtractor):
|
||||
"""Extractor for a pornhub user's gifs"""
|
||||
subcategory = "gifs"
|
||||
pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
|
||||
"/(gifs(?:/[^/?#]+)?)")
|
||||
test = (
|
||||
("https://www.pornhub.com/pornstar/danika-mori/gifs", {
|
||||
"pattern": PornhubGifExtractor.pattern,
|
||||
"count": ">= 42",
|
||||
}),
|
||||
("https://www.pornhub.com/users/flyings0l0/gifs"),
|
||||
("https://www.pornhub.com/model/bossgirl/gifs/video"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
PornhubExtractor.__init__(self, match)
|
||||
self.user, self.path = match.groups()
|
||||
|
||||
def items(self):
|
||||
data = {"_extractor": PornhubGifExtractor}
|
||||
for page in self._pagination(self.user, self.path):
|
||||
gid = None
|
||||
for gid in text.extract_iter(page, 'id="gif', '"'):
|
||||
yield Message.Queue, self.root + "/gif/" + gid, data
|
||||
if gid is None:
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user