[xhamster] add gallery & user extractor (#281)
This commit is contained in:
@@ -104,6 +104,7 @@ Warosu https://warosu.org/ Threads
|
||||
Weibo https://www.weibo.com/ Images from Users, Images from Statuses
|
||||
WikiArt.org https://www.wikiart.org/ Artists, Artworks
|
||||
World Three http://www.slide.world-three.org/ Chapters, Manga
|
||||
xHamster https://xhamster.com/ Images from Users, Galleries
|
||||
XVideos https://www.xvideos.com/ Images from Users, Galleries
|
||||
Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches
|
||||
yaplog! https://yaplog.jp/ Blogs, Posts
|
||||
|
||||
@@ -93,6 +93,7 @@ modules = [
|
||||
"warosu",
|
||||
"weibo",
|
||||
"wikiart",
|
||||
"xhamster",
|
||||
"xvideos",
|
||||
"yandere",
|
||||
"yaplog",
|
||||
|
||||
171
gallery_dl/extractor/xhamster.py
Normal file
171
gallery_dl/extractor/xhamster.py
Normal file
@@ -0,0 +1,171 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://xhamster.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
import json
|
||||
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:[^.]+\.)?xhamster\.(?:com|one|desi)"
|
||||
|
||||
|
||||
class XhamsterExtractor(Extractor):
|
||||
"""Base class for xhamster extractors"""
|
||||
category = "xhamster"
|
||||
root = "https://xhamster.com"
|
||||
|
||||
|
||||
class XhamsterGalleryExtractor(XhamsterExtractor):
|
||||
"""Extractor for image galleries on xhamster.com"""
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ("{category}", "{user[name]}",
|
||||
"{gallery[id]} {gallery[title]}")
|
||||
filename_fmt = "{num:>03}_{id}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"(/photos/gallery/[^/?&#]+)"
|
||||
test = (
|
||||
("https://xhamster.com/photos/gallery/11748968", {
|
||||
"pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$",
|
||||
"count": 143,
|
||||
"keyword": {
|
||||
"comments": int,
|
||||
"count": 143,
|
||||
"favorite": bool,
|
||||
"id": int,
|
||||
"num": int,
|
||||
"height": int,
|
||||
"width": int,
|
||||
"imageURL": str,
|
||||
"pageURL": str,
|
||||
"thumbURL": str,
|
||||
"gallery": {
|
||||
"date": "type:datetime",
|
||||
"description": "",
|
||||
"dislikes": int,
|
||||
"id": 11748968,
|
||||
"likes": int,
|
||||
"tags": ["NON-Porn"],
|
||||
"thumbnail": str,
|
||||
"title": "Make the world better.",
|
||||
"views": int,
|
||||
},
|
||||
"user": {
|
||||
"id": 16874672,
|
||||
"name": "Anonymousrants",
|
||||
"retired": bool,
|
||||
"subscribers": int,
|
||||
"url": "https://xhamster.com/users/anonymousrants",
|
||||
"verified": bool,
|
||||
},
|
||||
},
|
||||
}),
|
||||
("https://xhamster.com/photos/gallery/make-the-world-better-11748968"),
|
||||
("https://xhamster.com/photos/gallery/11748968"),
|
||||
("https://xhamster.one/photos/gallery/11748968"),
|
||||
("https://xhamster.desi/photos/gallery/11748968"),
|
||||
("https://en.xhamster.com/photos/gallery/11748968"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
XhamsterExtractor.__init__(self, match)
|
||||
self.path = match.group(1)
|
||||
self.data = None
|
||||
|
||||
def items(self):
|
||||
data = self.metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for num, image in enumerate(self.images(), 1):
|
||||
url = image["imageURL"]
|
||||
image.update(data)
|
||||
image["num"] = num
|
||||
yield Message.Url, url, text.nameext_from_url(url, image)
|
||||
|
||||
def metadata(self):
|
||||
self.data = self._data(self.root + self.path)
|
||||
user = self.data["authorModel"]
|
||||
imgs = self.data["photosGalleryModel"]
|
||||
|
||||
return {
|
||||
"user":
|
||||
{
|
||||
"id" : text.parse_int(user["id"]),
|
||||
"url" : user["pageURL"],
|
||||
"name" : user["name"],
|
||||
"retired" : user["retired"],
|
||||
"verified" : user["verified"],
|
||||
"subscribers": user["subscribers"],
|
||||
},
|
||||
"gallery":
|
||||
{
|
||||
"id" : text.parse_int(imgs["id"]),
|
||||
"tags" : [c["name"] for c in imgs["categories"]],
|
||||
"date" : text.parse_timestamp(imgs["created"]),
|
||||
"views" : text.parse_int(imgs["views"]),
|
||||
"likes" : text.parse_int(imgs["rating"]["likes"]),
|
||||
"dislikes" : text.parse_int(imgs["rating"]["dislikes"]),
|
||||
"title" : imgs["title"],
|
||||
"description": imgs["description"],
|
||||
"thumbnail" : imgs["thumbURL"],
|
||||
},
|
||||
"count": text.parse_int(imgs["quantity"]),
|
||||
}
|
||||
|
||||
def images(self):
|
||||
data = self.data
|
||||
self.data = None
|
||||
|
||||
while True:
|
||||
for image in data["photosGalleryModel"]["photos"]:
|
||||
del image["modelName"]
|
||||
yield image
|
||||
|
||||
pgntn = data["pagination"]
|
||||
if pgntn["active"] == pgntn["maxPage"]:
|
||||
return
|
||||
url = pgntn["pageLinkTemplate"][:-3] + str(pgntn["next"])
|
||||
data = self._data(url)
|
||||
|
||||
def _data(self, url):
|
||||
page = self.request(url).text
|
||||
return json.loads(text.extract(
|
||||
page, "window.initials =", "</script>")[0].rstrip("\n\r;"))
|
||||
|
||||
|
||||
class XhamsterUserExtractor(XhamsterExtractor):
|
||||
"""Extractor for all galleries of an xhamster user"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/users/([^/?&#]+)(?:/photos)?/?(?:$|[?#])"
|
||||
test = (
|
||||
("https://xhamster.com/users/nickname68/photos", {
|
||||
"pattern": XhamsterGalleryExtractor.pattern,
|
||||
"count": 50,
|
||||
"range": "1-50",
|
||||
}),
|
||||
("https://xhamster.com/users/nickname68"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
XhamsterExtractor.__init__(self, match)
|
||||
self.user = match.group(1)
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
url = "{}/users/{}/photos".format(self.root, self.user)
|
||||
data = {"_extractor": XhamsterGalleryExtractor}
|
||||
|
||||
while url:
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
while True:
|
||||
url = extr('thumb-image-container" href="', '"')
|
||||
if not url:
|
||||
break
|
||||
yield Message.Queue, url, data
|
||||
url = extr('data-page="next" href="', '"')
|
||||
@@ -69,6 +69,7 @@ CATEGORY_MAP = {
|
||||
"thebarchive" : "The /b/ Archive",
|
||||
"wikiart" : "WikiArt.org",
|
||||
"worldthree" : "World Three",
|
||||
"xhamster" : "xHamster",
|
||||
"xvideos" : "XVideos",
|
||||
"yaplog" : "yaplog!",
|
||||
"yuki" : "yuki.la 4chan archive",
|
||||
|
||||
Reference in New Issue
Block a user