From 2d0cfb33e11a02b875e0e8a68d101c44d31bf987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 2 Nov 2017 17:28:35 +0100 Subject: [PATCH] [xvideos] add user profile extractor (#45) --- docs/supportedsites.rst | 1 + gallery_dl/extractor/xvideos.py | 74 +++++++++++++++++++++++++++++---- scripts/build_supportedsites.py | 3 +- 3 files changed, 67 insertions(+), 11 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 43d7e929..546c9fc5 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -73,6 +73,7 @@ Tumblr https://www.tumblr.com/ Images from Users, Post Twitter https://twitter.com/ Tweets Warosu https://warosu.org/ Threads World Three http://www.slide.world-three.org/ Chapters, Manga +XVideos https://www.xvideos.com/ Images from Users, Galleries Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches YEET Archive https://archive.yeet.net/ Threads Acidimg https://acidimg.cc/ individual Images diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 182e7d68..a11d5108 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -6,15 +6,26 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.xvideos.com""" +"""Extract images from https://www.xvideos.com/""" from .common import Extractor, Message from .. import text, util, exception +import json -class XvideosGalleryExtractor(Extractor): - """Extractor for user profile galleries from xvideos.com""" +class XvideosExtractor(Extractor): + """Base class for xvideos extractors""" category = "xvideos" + + def get_page(self): + response = self.request(self.url, fatal=False) + if response.status_code in (403, 404): + raise exception.NotFoundError(self.subcategory) + return response.text + + +class XvideosGalleryExtractor(XvideosExtractor): + """Extractor for user profile galleries from xvideos.com""" subcategory = "gallery" directory_fmt = ["{category}", "{user[name]}", "{title}"] filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}" @@ -32,15 +43,13 @@ class XvideosGalleryExtractor(Extractor): ] def __init__(self, match): - Extractor.__init__(self) + XvideosExtractor.__init__(self) self.user, self.gid = match.groups() - self.url = match.group(0) + self.url = "https://www.xvideos.com/profiles/{}/photos/{}".format( + self.user, self.gid) def items(self): - response = self.request(self.url, fatal=False) - if response.status_code in (403, 404): - raise exception.NotFoundError("gallery") - page = response.text + page = self.get_page() data = self.get_metadata(page) imgs = self.get_images(page) data["count"] = len(imgs) @@ -78,3 +87,50 @@ class XvideosGalleryExtractor(Extractor): """Return a list of all image urls for this gallery""" return list(text.extract_iter( page, '")[0])["data"] + + if not isinstance(data["galleries"], dict): + return + if "0" in data["galleries"]: + del data["galleries"]["0"] + + galleries = [ + {"gallery_id": util.safe_int(gid), + "title": text.unescape(gdata["title"]), + "count": gdata["nb_pics"]} + for gid, gdata in data["galleries"].items() + ] + galleries.sort(key=lambda x: x["gallery_id"]) + + yield Message.Version, 1 + for gallery in galleries: + url = "https://www.xvideos.com/profiles/{}/photos/{}".format( + self.user, gallery["gallery_id"]) + yield Message.Queue, url, gallery diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index dc873975..21eea89b 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -56,8 +56,7 @@ CATEGORY_MAP = { "thebarchive" : "The /b/ Archive", "worldthree" : "World Three", "yeet" : "YEET Archive", - "yomanga" : "YoManga", - "yonkouprod" : "Yonkou Productions", + "xvideos" : "XVideos", } SUBCATEGORY_MAP = {