[xhamster] fix 'gallery' extractor (#6818)

This commit is contained in:
Mike Fährmann
2025-01-13 18:53:39 +01:00
parent bde99cc6ce
commit d17a423245
2 changed files with 36 additions and 40 deletions

View File

@@ -20,8 +20,8 @@ class XhamsterExtractor(Extractor):
category = "xhamster" category = "xhamster"
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match)
self.root = "https://" + match.group(1) self.root = "https://" + match.group(1)
Extractor.__init__(self, match)
class XhamsterGalleryExtractor(XhamsterExtractor): class XhamsterGalleryExtractor(XhamsterExtractor):
@@ -34,48 +34,48 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)" pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)"
example = "https://xhamster.com/photos/gallery/12345" example = "https://xhamster.com/photos/gallery/12345"
def __init__(self, match):
XhamsterExtractor.__init__(self, match)
self.path = match.group(2)
self.data = None
def items(self): def items(self):
data = self.metadata() data = self.metadata()
yield Message.Directory, data yield Message.Directory, data
for num, image in enumerate(self.images(), 1): for num, image in enumerate(self.images(), 1):
url = image["imageURL"] url = image["imageURL"]
image.update(data) image.update(data)
text.nameext_from_url(url, image)
image["num"] = num image["num"] = num
yield Message.Url, url, text.nameext_from_url(url, image) image["extension"] = "webp"
del image["modelName"]
yield Message.Url, url, image
def metadata(self): def metadata(self):
self.data = self._data(self.root + self.path) data = self.data = self._extract_data(self.root + self.groups[1])
user = self.data["authorModel"]
imgs = self.data["photosGalleryModel"] gallery = data["galleryPage"]
info = gallery["infoProps"]
model = gallery["galleryModel"]
author = info["authorInfoProps"]
return { return {
"user": "user":
{ {
"id" : text.parse_int(user["id"]), "id" : text.parse_int(model["userId"]),
"url" : user["pageURL"], "url" : author["authorLink"],
"name" : user["name"], "name" : author["authorName"],
"retired" : user["retired"], "verified" : True if author.get("verified") else False,
"verified" : user["verified"], "subscribers": info["subscribeButtonProps"]["subscribers"],
"subscribers": user["subscribers"],
}, },
"gallery": "gallery":
{ {
"id" : text.parse_int(imgs["id"]), "id" : text.parse_int(gallery["id"]),
"tags" : [c["name"] for c in imgs["categories"]], "tags" : [t["label"] for t in info["categoriesTags"]],
"date" : text.parse_timestamp(imgs["created"]), "date" : text.parse_timestamp(model["created"]),
"views" : text.parse_int(imgs["views"]), "views" : text.parse_int(model["views"]),
"likes" : text.parse_int(imgs["rating"]["likes"]), "likes" : text.parse_int(model["rating"]["likes"]),
"dislikes" : text.parse_int(imgs["rating"]["dislikes"]), "dislikes" : text.parse_int(model["rating"]["dislikes"]),
"title" : text.unescape(imgs["title"]), "title" : model["title"],
"description": text.unescape(imgs["description"]), "description": model["description"],
"thumbnail" : imgs["thumbURL"], "thumbnail" : model["thumbURL"],
}, },
"count": text.parse_int(imgs["quantity"]), "count": text.parse_int(gallery["photosCount"]),
} }
def images(self): def images(self):
@@ -83,17 +83,17 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
self.data = None self.data = None
while True: while True:
for image in data["photosGalleryModel"]["photos"]: yield from data["photosGalleryModel"]["photos"]
del image["modelName"]
yield image
pgntn = data["pagination"] pagination = data["galleryPage"]["paginationProps"]
if pgntn["active"] == pgntn["maxPage"]: if pagination["currentPageNumber"] >= pagination["lastPageNumber"]:
return return
url = pgntn["pageLinkTemplate"][:-3] + str(pgntn["next"]) url = (pagination["pageLinkTemplate"][:-3] +
data = self._data(url) str(pagination["currentPageNumber"] + 1))
def _data(self, url): data = self._extract_data(url)
def _extract_data(self, url):
page = self.request(url).text page = self.request(url).text
return util.json_loads(text.extr( return util.json_loads(text.extr(
page, "window.initials=", "</script>").rstrip("\n\r;")) page, "window.initials=", "</script>").rstrip("\n\r;"))
@@ -105,12 +105,8 @@ class XhamsterUserExtractor(XhamsterExtractor):
pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])" pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])"
example = "https://xhamster.com/users/USER/photos" example = "https://xhamster.com/users/USER/photos"
def __init__(self, match):
XhamsterExtractor.__init__(self, match)
self.user = match.group(2)
def items(self): def items(self):
url = "{}/users/{}/photos".format(self.root, self.user) url = "{}/users/{}/photos".format(self.root, self.groups[1])
data = {"_extractor": XhamsterGalleryExtractor} data = {"_extractor": XhamsterGalleryExtractor}
while url: while url:

View File

@@ -47,7 +47,7 @@ __tests__ = (
"user" : { "user" : {
"id" : 4741860, "id" : 4741860,
"name" : "DaringSex", "name" : "DaringSex",
"retired" : False, "?retired" : False,
"subscribers": range(25000, 50000), "subscribers": range(25000, 50000),
"url" : "https://xhamster.com/users/daringsex", "url" : "https://xhamster.com/users/daringsex",
"verified" : False, "verified" : False,