diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 43f20534..fa4f554c 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -78,6 +78,7 @@ Pinterest https://www.pinterest.com/ Boards, Pins, pin.it Li Pixiv https://www.pixiv.net/ |pixiv-C| Required Pixnet https://www.pixnet.net/ |pixnet-C| Plurk https://www.plurk.com/ Posts, Timelines +Pornhub https://www.pornhub.com/ Images from Users, Galleries Pornreactor http://pornreactor.cc/ |pornreactor-C| PowerManga https://read.powermanga.org/ Chapters, Manga Pururin https://pururin.io/ Galleries diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 2ceb009d..ac64d513 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -72,6 +72,7 @@ modules = [ "pixiv", "pixnet", "plurk", + "pornhub", "pururin", "reactor", "readcomiconline", diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py new file mode 100644 index 00000000..40816b30 --- /dev/null +++ b/gallery_dl/extractor/pornhub.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.pornhub.com/""" + +from .common import Extractor, Message +from .. import text, exception + + +BASE_PATTERN = r"(?:https?://)?(?:[^.]+\.)?pornhub\.com" + + +class PornhubExtractor(Extractor): + """Base class for pornhub extractors""" + category = "pornhub" + root = "https://www.pornhub.com" + + +class PornhubGalleryExtractor(PornhubExtractor): + """Extractor for image galleries on pornhub.com""" + subcategory = "gallery" + directory_fmt = ("{category}", "{user}", "{gallery[id]} {gallery[title]}") + filename_fmt = "{num:>03}_{id}.{extension}" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/album/(\d+)" + test = ( + ("https://www.pornhub.com/album/1708982", { + "pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/", + "count": 93, + "keyword": { + "id": int, + "num": int, + "score": int, + "views": int, + "caption": str, + "user": "Unknown", + "gallery": { + "id" : 1708982, + "score": int, + "views": int, + "tags" : list, + "title": "Random Hentai", + }, + }, + }), + ("https://www.pornhub.com/album/37180171", { + "exception": exception.AuthorizationError, + }), + ) + + def __init__(self, match): + PornhubExtractor.__init__(self, match) + self.gallery_id = match.group(1) + self._first = None + + def items(self): + data = self.metadata() + yield Message.Version, 1 + yield Message.Directory, data + for num, image in enumerate(self.images(), 1): + url = image["url"] + image.update(data) + image["num"] = num + yield Message.Url, url, text.nameext_from_url(url, image) + + def metadata(self): + url = "{}/album/{}".format( + self.root, self.gallery_id) + extr = text.extract_from(self.request(url).text) + + title = extr("