diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index e253b7f6..80a3614a 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -6,86 +6,91 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.xvideos.com/"""
+"""Extractors for https://www.xvideos.com/"""
-from .common import Extractor, Message
-from .. import text, exception
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
import json
-class XvideosExtractor(Extractor):
+class XvideosBase():
"""Base class for xvideos extractors"""
category = "xvideos"
root = "https://www.xvideos.com"
-class XvideosGalleryExtractor(XvideosExtractor):
- """Extractor for user profile galleries from xvideos.com"""
+class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
+ """Extractor for user profile galleries on xvideos.com"""
subcategory = "gallery"
- directory_fmt = ("{category}", "{user[name]}", "{title}")
- filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
- archive_fmt = "{gallery_id}_{num}"
+ directory_fmt = ("{category}", "{user[name]}",
+ "{gallery[id]} {gallery[title]}")
+ filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}"
+ archive_fmt = "{gallery[id]}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
- r"/profiles/([^/?]+)/photos/(\d+)")
+ r"/(?:profiles|amateur-channels|model-channels)"
+ r"/([^/?]+)/photos/(\d+)")
test = (
- (("https://www.xvideos.com/profiles"
- "/pervertedcouple/photos/751031/random_stuff"), {
+ ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
"url": "4f0d992e5dc39def2c3ac8e099d17bf09e76e3c7",
- "keyword": "65979d63a69576cf692b41d5fbbd995cc40a51b9",
- }),
- ("https://www.xvideos.com/profiles/pervertedcouple/photos/751032/", {
- "exception": exception.NotFoundError,
+ "keyword": {
+ "gallery": {
+ "id" : 751031,
+ "title": "Random Stuff",
+ "tags" : list,
+ },
+ "user": {
+ "id" : 20245371,
+ "name" : "pervertedcouple",
+ "display" : "Pervertedcouple",
+ "sex" : "Woman",
+ "description": str,
+ },
+ },
}),
+ ("https://www.xvideos.com/amateur-channels/pervertedcouple/photos/12"),
+ ("https://www.xvideos.com/model-channels/pervertedcouple/photos/12"),
)
def __init__(self, match):
- XvideosExtractor.__init__(self, match)
- self.user, self.gid = match.groups()
+ self.user, self.gallery_id = match.groups()
+ url = "{}/profiles/{}/photos/{}".format(
+ self.root, self.user, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
- def items(self):
- url = "{}/profiles/{}/photos/{}".format(self.root, self.user, self.gid)
- page = self.request(url, notfound=self.subcategory).text
- data = self.get_metadata(page)
- imgs = self.get_images(page)
- data["count"] = len(imgs)
- yield Message.Version, 1
- yield Message.Directory, data
- for url in imgs:
- data["num"] = text.parse_int(url.rsplit("_", 2)[1])
- data["extension"] = url.rpartition(".")[2]
- yield Message.Url, url, data
-
- def get_metadata(self, page):
- """Collect metadata for extractor-job"""
- data = text.extract_all(page, (
- ("userid" , '"id_user":', ','),
- ("display", '"display":"', '"'),
- ("title" , '"title":"', '"'),
- ("descr" , '', ''),
- ("tags" , 'Tagged:', '<'),
- ))[0]
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ user = {
+ "id" : text.parse_int(extr('"id_user":', ',')),
+ "display": extr('"display":"', '"'),
+ "sex" : extr('"sex":"', '"'),
+ "name" : self.user,
+ }
+ title = extr('"title":"', '"')
+ user["description"] = extr(
+ '', '').strip()
+ tags = extr('Tagged:', '<').strip()
return {
- "user": {
- "id": text.parse_int(data["userid"]),
- "name": self.user,
- "display": data["display"],
- "description": data["descr"].strip(),
+ "user": user,
+ "gallery": {
+ "id" : text.parse_int(self.gallery_id),
+ "title": text.unescape(title),
+ "tags" : text.unescape(tags).split(", ") if tags else [],
},
- "tags": text.unescape(data["tags"] or "").strip().split(", "),
- "title": text.unescape(data["title"]),
- "gallery_id": text.parse_int(self.gid),
}
@staticmethod
- def get_images(page):
+ def images(page):
"""Return a list of all image urls for this gallery"""
- return list(text.extract_iter(
- page, '