From 5c487300ee15a13756a2beb7b3c6ef9766e52318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 13 Apr 2018 19:21:32 +0200 Subject: [PATCH 01/12] improve 'parse_query()' and add tests - another irrelevant micro-optimization ! - use urllib.parse.parse_qsl directly instead of parse_qs, which just packs the results of parse_qsl in a different data structure - reduced memory requirements since no additional dict and lists are created --- gallery_dl/extractor/hitomi.py | 2 +- gallery_dl/text.py | 6 +++++- test/test_text.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 64ae1e12..a7014fd7 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -24,7 +24,7 @@ class HitomiGalleryExtractor(ChapterExtractor): test = [ ("https://hitomi.la/galleries/867789.html", { "url": "cb759868d090fe0e2655c3e29ebf146054322b6d", - "keyword": "b1e66ff971fc8cb80240a687f508f3b74053f799", + "keyword": "85e453d01ee7f137669e75a764ccdc65ca092ad2", }), ("https://hitomi.la/reader/867789.html", None), ] diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 82c56a8b..7e5cb29f 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -130,7 +130,11 @@ def extract_iter(txt, begin, end, pos=0): def parse_query(qs): """Parse a query string into key-value pairs""" - return {key: vlist[0] for key, vlist in urllib.parse.parse_qs(qs).items()} + result = {} + for key, value in urllib.parse.parse_qsl(qs): + if key not in result: + result[key] = value + return result if os.name == "nt": diff --git a/test/test_text.py b/test/test_text.py index e26dde76..767952fd 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -123,6 +123,37 @@ class TestText(unittest.TestCase): result = ["c", "b", "a", "d"] self.assertEqual(list(text.extract_iter(txt, "[", "]")), result) + def test_parse_query(self): + # standard stuff + self.assertEqual( + text.parse_query(""), {}) + self.assertEqual( + text.parse_query("foo=1"), {"foo": "1"}) + self.assertEqual( + text.parse_query("foo=1&bar=2"), {"foo": "1", "bar": "2"}) + + # missing value + self.assertEqual( + text.parse_query("bar"), {}) + self.assertEqual( + text.parse_query("foo=1&bar"), {"foo": "1"}) + self.assertEqual( + text.parse_query("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"}) + + # keys with identical names + self.assertEqual( + text.parse_query("foo=1&foo=2"), {"foo": "1"}) + self.assertEqual( + text.parse_query("foo=1&bar=2&foo=3&bar=4"), + {"foo": "1", "bar": "2"}, + ) + + # non-string arguments + self.assertEqual(text.parse_query(()), {}) + self.assertEqual(text.parse_query([]), {}) + self.assertEqual(text.parse_query({}), {}) + self.assertEqual(text.parse_query(None), {}) + if __name__ == '__main__': unittest.main() From dd491274085eff1303bdecfdb0d5f71424326b36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 16 Apr 2018 09:41:34 +0200 Subject: [PATCH 02/12] [spectrumnexus] remove module Site stopped hosting manga scans (http://view.thespectrum.net/) --- docs/supportedsites.rst | 23 ++++----- gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/spectrumnexus.py | 74 --------------------------- 3 files changed, 10 insertions(+), 88 deletions(-) delete mode 100644 gallery_dl/extractor/spectrumnexus.py diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index e1533c96..e55ba8b9 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -10,17 +10,17 @@ Site URL Capabilities arch.b4k.co https://arch.b4k.co/ Threads Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads -ArtStation https://www.artstation.com/ |Images from Use-1| +ArtStation https://www.artstation.com/ |Images from Use-0| Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Desuarchive https://desuarchive.org/ Threads -DeviantArt https://www.deviantart.com/ |Collections, De-2| Optional (OAuth) +DeviantArt https://www.deviantart.com/ |Collections, De-1| Optional (OAuth) Doki Reader https://kobato.hologfx.com/ Chapters, Manga Dynasty Reader https://dynasty-scans.com/ Chapters e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Optional Fallen Angels Scans https://www.fascans.com/ Chapters, Manga Fireden https://boards.fireden.net/ Threads -Flickr https://www.flickr.com/ |Images from Use-3| Optional (OAuth) +Flickr https://www.flickr.com/ |Images from Use-2| Optional (OAuth) Futaba Channel https://www.2chan.net/ Threads Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches Gfycat https://gfycat.com/ individual Images @@ -42,7 +42,6 @@ Kirei Cake https://reader.kireicake.com/ Chapters, Manga KissManga http://kissmanga.com/ Chapters, Manga Komikcast https://komikcast.com/ Chapters, Manga Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag-Searches -Love is Over Archive https://archive.loveisover.me/ Threads Luscious https://luscious.net/ Albums Manga Fox http://fanfox.net/ Chapters Manga Here http://www.mangahere.co/ Chapters, Manga @@ -53,11 +52,11 @@ MangaPark https://mangapark.me/ Chapters, Manga Mangareader https://www.mangareader.net/ Chapters, Manga nhentai https://nhentai.net/ Galleries, Search Results Niconico Seiga http://seiga.nicovideo.jp Images from Users, individual Images Required -nijie https://nijie.info/ |Images from Use-4| Required +nijie https://nijie.info/ |Images from Use-3| Required Nyafuu Archive https://archive.nyafuu.org/ Threads Pawoo https://pawoo.net Images from Users, Images from Statuses Pinterest https://www.pinterest.com Boards, Pins, pin.it Links -Pixiv https://www.pixiv.net/ |Images from Use-5| Required +Pixiv https://www.pixiv.net/ |Images from Use-4| Required PowerManga https://powermanga.org/ Chapters, Manga Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics @@ -71,7 +70,6 @@ Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga SlideShare https://www.slideshare.net/ Presentations -Spectrum Nexus |http://www.thes-0| Chapters, Manga Subapics https://subapics.com/ Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) @@ -90,9 +88,8 @@ Postimg https://postimg.org/ individual Images Turboimagehost https://turboimagehost.com/ individual Images ==================== =================================== ================================================== ================ -.. |http://www.thes-0| replace:: http://www.thespectrum.net/manga_scans/ -.. |Images from Use-1| replace:: Images from Users, Albums, Challenges, individual Images, Likes, Search Results -.. |Collections, De-2| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals -.. |Images from Use-3| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results -.. |Images from Use-4| replace:: Images from Users, Doujin, Favorites, individual Images -.. |Images from Use-5| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images +.. |Images from Use-0| replace:: Images from Users, Albums, Challenges, individual Images, Likes, Search Results +.. |Collections, De-1| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals +.. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results +.. |Images from Use-3| replace:: Images from Users, Doujin, Favorites, individual Images +.. |Images from Use-4| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d460e482..1a9c28c3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -77,7 +77,6 @@ modules = [ "senmanga", "sensescans", "slideshare", - "spectrumnexus", "subapics", "thebarchive", "tumblr", diff --git a/gallery_dl/extractor/spectrumnexus.py b/gallery_dl/extractor/spectrumnexus.py deleted file mode 100644 index 8bc2aa1c..00000000 --- a/gallery_dl/extractor/spectrumnexus.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2015-2018 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract manga pages from http://www.thespectrum.net/manga_scans/""" - -from .common import ChapterExtractor, MangaExtractor -from .. import text, util - - -class SpectrumnexusMangaExtractor(MangaExtractor): - """Extractor for manga from thespectrum.net""" - category = "spectrumnexus" - pattern = [r"(?:https?://)?(view\.thespectrum\.net/series/[^.]+\.html)#?$"] - reverse = False - test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", { - "url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb", - "keyword": "5ed9d5c7c69d2d03417c853c4e8eae30f1e5febf", - })] - - def chapters(self, page): - results = [] - manga = text.extract(page, '', ' · ')[0] - page = text.extract(page, 'class="selectchapter"', '</select>')[0] - for chapter in text.extract_iter(page, '<option value="', '"'): - results.append((self.url + "?ch=" + chapter.replace(" ", "+"), { - "manga": manga, "chapter_string": chapter, - })) - return results - - -class SpectrumnexusChapterExtractor(ChapterExtractor): - """Extractor for manga-chapters or -volumes from thespectrum.net""" - category = "spectrumnexus" - directory_fmt = ["{category}", "{manga}", "{chapter_string}"] - filename_fmt = "{manga}_{chapter_string}_{page:>03}.{extension}" - archive_fmt = "{manga}_{chapter_string}_{page}" - pattern = [r"(?:https?://)?view\.thespectrum\.net/series/" - r"([^\.]+\.html)\?ch=(Chapter\+(\d+)|Volume\+(\d+))"] - test = [(("http://view.thespectrum.net/series/" - "toriko.html?ch=Chapter+343&page=1"), { - "url": "c0fc7dc594841217cc622a67edd79f06e9900333", - "keyword": "a8abe126cbc5fc798148b0b155242a470c1ba9d1", - })] - - def __init__(self, match): - path, self.chapter_string, self.chapter, self.volume = match.groups() - url = "http://view.thespectrum.net/series/{}?ch={}".format( - path, self.chapter_string) - ChapterExtractor.__init__(self, url) - - def get_metadata(self, page): - data = { - "chapter": util.safe_int(self.chapter), - "chapter_string": self.chapter_string.replace("+", " "), - "volume": util.safe_int(self.volume), - } - data = text.extract_all(page, ( - ('manga', '<title>', ' · SPECTRUM NEXUS '), - ('count', '
of ', '<'), - ), values=data)[0] - data["count"] = util.safe_int(data["count"]) - return data - - def get_images(self, page): - params = {"page": 1} - while True: - yield text.extract(page, ' Date: Mon, 16 Apr 2018 09:46:45 +0200 Subject: [PATCH 03/12] [pinterest] update access_token (#83) --- gallery_dl/extractor/pinterest.py | 4 ++-- test/test_results.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index e4484102..5c421ee2 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -135,8 +135,8 @@ class PinterestPinitExtractor(PinterestExtractor): class PinterestAPI(): """Minimal interface for the pinterest API""" - def __init__(self, extractor, access_token="AV2U9Oe6dyC2vfPugUnBvJ7Duxg9" - "FHCJPXPZIvRDXv9hvwBALwAAAAA"): + def __init__(self, extractor, access_token="AfyIXxi1MJ6et0NlIl_vBchHbex-" + "FSWylPyr2GJE2uu3W8A97QAAAAA"): access_token = extractor.config("access-token", access_token) self.session = extractor.session self.session.params["access_token"] = access_token diff --git a/test/test_results.py b/test/test_results.py index e8344a72..fae3fba0 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,7 +21,7 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "pinterest", # access_token invalid ? + "luscious", # layout change "puremashiro", # online reader down } From 9651f3fce09df6b4c8c920195443476199595673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 16 Apr 2018 15:43:31 +0200 Subject: [PATCH 04/12] [pinterest] improve error messages (#83) --- gallery_dl/extractor/pinterest.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 5c421ee2..5bc61b69 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -46,7 +46,7 @@ class PinterestPinExtractor(PinterestExtractor): "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", }), ("https://www.pinterest.com/pin/858146903966145188/", { - "exception": exception.NotFoundError, + "exception": exception.StopExtraction, }), ] @@ -140,6 +140,7 @@ class PinterestAPI(): access_token = extractor.config("access-token", access_token) self.session = extractor.session self.session.params["access_token"] = access_token + self.log = extractor.log def pin(self, pin_id, fields="id,image,note"): """Query information about a pin""" @@ -161,7 +162,7 @@ class PinterestAPI(): def board_pins(self, user, board, fields="id,image,note"): """Yield all pins of a specific board""" - params = {"fields": fields} + params = {"fields": fields, "limit": 100} url = ("https://api.pinterest.com/v1/boards/{user}/{board}/pins/" .format(user=user, board=board)) while True: @@ -173,14 +174,16 @@ class PinterestAPI(): return params["cursor"] = cursor - @staticmethod - def _parse(response): + def _parse(self, response): """Parse an API response""" data = response.json() - if "data" not in data or data["data"] is None: - try: - msg = data["message"].partition(" ")[0].lower() - except KeyError: - msg = "" + if 200 <= response.status_code < 400 and data.get("data"): + return data + + msg = data.get("message") + if response.status_code == 404: + msg = msg.partition(" ")[0].lower() raise exception.NotFoundError(msg) - return data + else: + self.log.error("API request failed: %s", msg or "") + raise exception.StopExtraction() From 4bd182c107f0ea38917f8a9bc502d1d94a2d9c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 16 Apr 2018 19:43:27 +0200 Subject: [PATCH 05/12] [pinterest] implement `oauth:pinterest` (#83) Pinterest access tokens are rate limited at 200 requests per hour (or maybe per 2 or 3 hours?) so having just one access token for all users isn't going to work in the long run. --- README.rst | 2 +- docs/configuration.rst | 37 +++++++------ docs/supportedsites.rst | 2 +- gallery_dl/extractor/oauth.py | 88 ++++++++++++++++++++----------- gallery_dl/extractor/pinterest.py | 3 ++ scripts/build_supportedsites.py | 1 + test/test_results.py | 2 + 7 files changed, 88 insertions(+), 47 deletions(-) diff --git a/README.rst b/README.rst index 5bb73eb1..bba884c8 100644 --- a/README.rst +++ b/README.rst @@ -194,7 +194,7 @@ OAuth ----- *gallery-dl* supports user authentication via OAuth_ for -``deviantart``, ``flickr``, ``reddit`` and ``tumblr``. +``deviantart``, ``flickr``, ``pinterest``, ``reddit`` and ``tumblr``. This is entirely optional, but grants *gallery-dl* the ability to issue requests on your account's behalf and enables it to access resources which would otherwise be unavailable to a public user. diff --git a/docs/configuration.rst b/docs/configuration.rst index b80938ae..3d7cb301 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -788,7 +788,9 @@ extractor.deviantart.client-id & .client-secret ----------------------------------------------- =========== ===== Type ``string`` -How To - login and visit DeviantArt's `Applications & Keys`_ section +How To - login and visit DeviantArt's + `Applications & Keys `__ + section - click "Register your Application" - click "Save" (top right; default settings are fine) - copy ``client_id`` and ``client_secret`` of your new "Untitled" @@ -800,7 +802,8 @@ extractor.flickr.api-key & .api-secret -------------------------------------- =========== ===== Type ``string`` -How To - login and `Create an App`_ in Flickr's `App Garden`_ +How To - login and `Create an App `__ + in Flickr's `App Garden `__ - click "APPLY FOR A NON-COMMERCIAL KEY" - fill out the form with a random name and description and click "SUBMIT" @@ -817,11 +820,19 @@ How To =========== ===== -extractor.pinterest.access-token --------------------------------- +extractor.pinterest.client-id & .secret +--------------------------------------- =========== ===== Type ``string`` -How To +How To - login and visit Pinterest's + `Apps `__ section + - click "Create app" + - choose a random name and description and click "Create" + - scroll down and set a Site URL (e.g. https://example.org/) + and allow https://mikf.github.io/gallery-dl/oauth-redirect.html + as Redirect URI + - scroll back up again, copy the "App ID" and "App secret" values + and put them in your configuration file =========== ===== @@ -829,7 +840,8 @@ extractor.reddit.client-id & .user-agent ---------------------------------------- =========== ===== Type ``string`` -How To - login and visit the apps_ section of your account's preferences +How To - login and visit the `apps `__ + section of your account's preferences - click the "are you a developer? create an app..." button - fill out the form, choose "installed app", preferably set "http://localhost:6414/" as "redirect uri" and finally click @@ -838,7 +850,8 @@ How To - login and visit the apps_ section of your account's preferences "installed app") and put it in your configuration file - use "``Python::v1.0 (by /u/)``" as user-agent and replace ```` and ```` - accordingly (see Reddit's `API access rules`_) + accordingly (see Reddit's + `API access rules `__) =========== ===== @@ -846,7 +859,8 @@ extractor.tumblr.api-key ------------------------ =========== ===== Type ``string`` -How To - login and visit Tumblr's Applications_ section +How To - login and visit Tumblr's + `Applications `__ section - click "Register application" - fill out the form: use a random name and description, set https://example.org/ as "Application Website" and "Default @@ -886,10 +900,3 @@ How To - login and visit Tumblr's Applications_ section .. _webbrowser.open(): https://docs.python.org/3/library/webbrowser.html .. _datetime.max: https://docs.python.org/3/library/datetime.html#datetime.datetime.max .. _Authentication: https://github.com/mikf/gallery-dl#5authentication - -.. _`Applications & Keys`: https://www.deviantart.com/developers/apps -.. _`Create an App`: https://www.flickr.com/services/apps/create/apply/ -.. _`App Garden`: https://www.flickr.com/services/ -.. _apps: https://www.reddit.com/prefs/apps/ -.. _`API access rules`: https://github.com/reddit/reddit/wiki/API -.. _Applications: https://www.tumblr.com/oauth/apps diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index e55ba8b9..602251dc 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -55,7 +55,7 @@ Niconico Seiga http://seiga.nicovideo.jp Images from Users, indi nijie https://nijie.info/ |Images from Use-3| Required Nyafuu Archive https://archive.nyafuu.org/ Threads Pawoo https://pawoo.net Images from Users, Images from Statuses -Pinterest https://www.pinterest.com Boards, Pins, pin.it Links +Pinterest https://www.pinterest.com Boards, Pins, pin.it Links Optional (OAuth) Pixiv https://www.pixiv.net/ |Images from Use-4| Required PowerManga https://powermanga.org/ Chapters, Manga Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 12d80cbe..c8269f03 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -9,7 +9,7 @@ """Utility classes to setup OAuth and link a users account to gallery-dl""" from .common import Extractor, Message -from . import deviantart, flickr, reddit, tumblr +from . import deviantart, flickr, pinterest, reddit, tumblr from .. import text, util, config import os import urllib.parse @@ -50,14 +50,7 @@ class OAuthBase(Extractor): data = self.client.recv(1024).decode() path = data.split(" ", 2)[1] - query = path.partition("?")[2] - return { - key: urllib.parse.unquote(value) - for key, _, value in [ - part.partition("=") - for part in query.split("&") - ] - } + return text.parse_query(path.partition("?")[2]) def send(self, msg): """Send 'msg' to the socket opened in 'recv()'""" @@ -69,8 +62,7 @@ class OAuthBase(Extractor): """Open 'url' in browser amd return response parameters""" import webbrowser url += "?" + urllib.parse.urlencode(params) - browser = self.config("browser", True) - if not browser or not webbrowser.open(url): + if not self.config("browser", True) or not webbrowser.open(url): print("Please open this URL in your browser:") print(url, end="\n\n", flush=True) return self.recv() @@ -80,7 +72,7 @@ class OAuthBase(Extractor): """Perform the OAuth 1.0a authorization flow""" del self.session.params["oauth_token"] - # Get a Request Token + # get a request token params = {"oauth_callback": self.redirect_uri} data = self.session.get(request_token_url, params=params).text @@ -88,25 +80,29 @@ class OAuthBase(Extractor): self.session.params["oauth_token"] = token = data["oauth_token"] self.session.token_secret = data["oauth_token_secret"] - # Get the User's Authorization + # get the user's authorization params = {"oauth_token": token, "perms": "read"} data = self.open(authorize_url, params) - # Exchange the Request Token for an Access Token + # exchange the request token for an access token data = self.session.get(access_token_url, params=data).text data = text.parse_query(data) self.send(OAUTH1_MSG_TEMPLATE.format( category=self.subcategory, token=data["oauth_token"], - token_secret=data["oauth_token_secret"])) + token_secret=data["oauth_token_secret"], + )) def _oauth2_authorization_code_grant( - self, client_id, client_secret, auth_url, token_url, scope): + self, client_id, client_secret, auth_url, token_url, + scope="read", key="refresh_token", auth=True): """Perform an OAuth2 authorization code grant""" - state = "gallery-dl:{}:{}".format( - self.subcategory, util.OAuthSession.nonce(8)) + state = "gallery-dl_{}_{}".format( + self.subcategory, + util.OAuthSession.nonce(8) + ) auth_params = { "client_id": client_id, @@ -117,25 +113,33 @@ class OAuthBase(Extractor): "scope": scope, } - # receive 'code' + # receive an authorization code params = self.open(auth_url, auth_params) - # check auth response + # check authorization response if state != params.get("state"): self.send("'state' mismatch: expected {}, got {}.".format( - state, params.get("state"))) + state, params.get("state") + )) return if "error" in params: self.send(params["error"]) return - # exchange 'code' for 'refresh_token' + # exchange the authorization code for a token data = { "grant_type": "authorization_code", "code": params["code"], "redirect_uri": self.redirect_uri, } - auth = (client_id, client_secret) + + if auth: + auth = (client_id, client_secret) + else: + auth = None + data["client_id"] = client_id + data["client_secret"] = client_secret + data = self.session.post(token_url, data=data, auth=auth).json() # check token response @@ -143,10 +147,13 @@ class OAuthBase(Extractor): self.send(data["error"]) return - # display refresh token + # display token + part = key.partition("_")[0] self.send(OAUTH2_MSG_TEMPLATE.format( category=self.subcategory, - token=data["refresh_token"] + key=part, + Key=part.capitalize(), + token=data[key], )) @@ -165,7 +172,7 @@ class OAuthDeviantart(OAuthBase): "client-secret", deviantart.DeviantartAPI.CLIENT_SECRET), "https://www.deviantart.com/oauth2/authorize", "https://www.deviantart.com/oauth2/token", - "browse", + scope="browse", ) @@ -191,6 +198,27 @@ class OAuthFlickr(OAuthBase): ) +class OAuthPinterest(OAuthBase): + subcategory = "pinterest" + pattern = ["oauth:pinterest$"] + redirect_uri = "https://mikf.github.io/gallery-dl/oauth-redirect.html" + + def items(self): + yield Message.Version, 1 + + self._oauth2_authorization_code_grant( + self.oauth_config( + "client-id", pinterest.PinterestAPI.CLIENT_ID), + self.oauth_config( + "client-secret", pinterest.PinterestAPI.CLIENT_SECRET), + "https://api.pinterest.com/oauth/", + "https://api.pinterest.com/v1/oauth/token", + scope="read_public", + key="access_token", + auth=False, + ) + + class OAuthReddit(OAuthBase): subcategory = "reddit" pattern = ["oauth:reddit$"] @@ -204,7 +232,7 @@ class OAuthReddit(OAuthBase): "", "https://www.reddit.com/api/v1/authorize", "https://www.reddit.com/api/v1/access_token", - "read", + scope="read", ) @@ -253,18 +281,18 @@ Example: OAUTH2_MSG_TEMPLATE = """ -Your Refresh Token is +Your {Key} Token is {token} Put this value into your configuration file as -'extractor.{category}.refesh-token'. +'extractor.{category}.{key}-token'. Example: {{ "extractor": {{ "{category}": {{ - "refresh-token": "{token}" + "{key}-token": "{token}" }} }} }} diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 5bc61b69..60c4ac1c 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -134,6 +134,9 @@ class PinterestPinitExtractor(PinterestExtractor): class PinterestAPI(): """Minimal interface for the pinterest API""" + CLIENT_ID = "4959725425749142746" + CLIENT_SECRET = ("2ea77dc64ca02974a728e46c5a9d2adf" + "cdd42f4d4ffb40ad064072165ad4b10d") def __init__(self, extractor, access_token="AfyIXxi1MJ6et0NlIl_vBchHbex-" "FSWylPyr2GJE2uu3W8A97QAAAAA"): diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 398d3623..cd7253ce 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -88,6 +88,7 @@ AUTH_MAP = { "flickr" : "Optional (OAuth)", "idolcomplex": "Optional", "nijie" : "Required", + "pinterest" : "Optional (OAuth)", "pixiv" : "Required", "reddit" : "Optional (OAuth)", "sankaku" : "Optional", diff --git a/test/test_results.py b/test/test_results.py index fae3fba0..7f3fb94d 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -40,6 +40,8 @@ class TestExtractorResults(unittest.TestCase): config.set(("extractor", "deviantart", "client-id"), "7777") config.set(("extractor", "deviantart", "client-secret"), "ff14994c744d9208e5caeec7aab4a026") + config.set(("extractor", "pinterest", "access-token"), + "Ab1gUJFF5TFoWXRbX0p7_ue7jOHeFSX8iOrCIOZE24bOp0A6TQAAAAA") config.set(("extractor", "tumblr", "api-key"), "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6") From d10579edb5f956a09e731e6ae1c00426710c2ae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 17 Apr 2018 17:12:42 +0200 Subject: [PATCH 06/12] [pinterest] improve PinterestAPI code; remove OAuth mentions on another note: access_tokens have been set to only allow for 10 requests per hour (from 200 yesterday) --- README.rst | 2 +- docs/configuration.rst | 15 +++++- docs/supportedsites.rst | 2 +- gallery_dl/extractor/pinterest.py | 76 ++++++++++++++++--------------- scripts/build_supportedsites.py | 1 - test/test_results.py | 3 +- 6 files changed, 56 insertions(+), 43 deletions(-) diff --git a/README.rst b/README.rst index bba884c8..5bb73eb1 100644 --- a/README.rst +++ b/README.rst @@ -194,7 +194,7 @@ OAuth ----- *gallery-dl* supports user authentication via OAuth_ for -``deviantart``, ``flickr``, ``pinterest``, ``reddit`` and ``tumblr``. +``deviantart``, ``flickr``, ``reddit`` and ``tumblr``. This is entirely optional, but grants *gallery-dl* the ability to issue requests on your account's behalf and enables it to access resources which would otherwise be unavailable to a public user. diff --git a/docs/configuration.rst b/docs/configuration.rst index 3d7cb301..6c464d41 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -820,13 +820,26 @@ How To =========== ===== +extractor.pinterest.access-token +-------------------------------- +=========== ===== +Type ``string`` +How To - register a Pinterest application and use its client-id and + client-secret (see `extractor.pinterest.client-id & .secret`_) + - run `gallery-dl oauth:pinterest` and authenticate access with + (preferably) the same account that registered the application +Notes Access tokens currently only allow for 10 requests per hour. +=========== ===== + + extractor.pinterest.client-id & .secret --------------------------------------- =========== ===== Type ``string`` How To - login and visit Pinterest's `Apps `__ section - - click "Create app" + - agree to "Pinterest Developer Terms and the API Policy" + and click "Create app" - choose a random name and description and click "Create" - scroll down and set a Site URL (e.g. https://example.org/) and allow https://mikf.github.io/gallery-dl/oauth-redirect.html diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 602251dc..e55ba8b9 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -55,7 +55,7 @@ Niconico Seiga http://seiga.nicovideo.jp Images from Users, indi nijie https://nijie.info/ |Images from Use-3| Required Nyafuu Archive https://archive.nyafuu.org/ Threads Pawoo https://pawoo.net Images from Users, Images from Statuses -Pinterest https://www.pinterest.com Boards, Pins, pin.it Links Optional (OAuth) +Pinterest https://www.pinterest.com Boards, Pins, pin.it Links Pixiv https://www.pixiv.net/ |Images from Use-4| Required PowerManga https://powermanga.org/ Chapters, Manga Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 60c4ac1c..99d97f52 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -48,6 +48,9 @@ class PinterestPinExtractor(PinterestExtractor): ("https://www.pinterest.com/pin/858146903966145188/", { "exception": exception.StopExtraction, }), + ("https://www.pinterest.com/pin/85814690396614518/", { + "exception": exception.NotFoundError, + }), ] def __init__(self, match): @@ -134,59 +137,58 @@ class PinterestPinitExtractor(PinterestExtractor): class PinterestAPI(): """Minimal interface for the pinterest API""" - CLIENT_ID = "4959725425749142746" - CLIENT_SECRET = ("2ea77dc64ca02974a728e46c5a9d2adf" - "cdd42f4d4ffb40ad064072165ad4b10d") - def __init__(self, extractor, access_token="AfyIXxi1MJ6et0NlIl_vBchHbex-" - "FSWylPyr2GJE2uu3W8A97QAAAAA"): - access_token = extractor.config("access-token", access_token) - self.session = extractor.session - self.session.params["access_token"] = access_token + def __init__(self, extractor, access_token=None): self.log = extractor.log + self.session = extractor.session + self.access_token = ( + access_token or + extractor.config("access-token") or + "AfyIXxi1MJ6et0NlIl_vBchHbex-FSWylPyr2GJE2uu3W8A97QAAAAA" + ) def pin(self, pin_id, fields="id,image,note"): """Query information about a pin""" + endpoint = "pins/{}/".format(pin_id) params = {"fields": fields} - response = self.session.get( - "https://api.pinterest.com/v1/pins/{pin}/".format(pin=pin_id), - params=params - ) - return self._parse(response)["data"] + return self._call(endpoint, params)["data"] def board(self, user, board, fields="id,name,counts"): """Query information about a board""" + endpoint = "boards/{}/{}/".format(user, board) params = {"fields": fields} - response = self.session.get( - "https://api.pinterest.com/v1/boards/{user}/{board}/" - .format(user=user, board=board), params=params - ) - return self._parse(response)["data"] + return self._call(endpoint, params)["data"] - def board_pins(self, user, board, fields="id,image,note"): + def board_pins(self, user, board, fields="id,image,note", limit=100): """Yield all pins of a specific board""" - params = {"fields": fields, "limit": 100} - url = ("https://api.pinterest.com/v1/boards/{user}/{board}/pins/" - .format(user=user, board=board)) + endpoint = "boards/{}/{}/pins/".format(user, board) + params = {"fields": fields, "limit": limit} + return self._pagination(endpoint, params) + + def _call(self, endpoint, params): + params["access_token"] = self.access_token + url = "https://api.pinterest.com/v1/" + endpoint + + response = self.session.get(url, params=params) + status = response.status_code + data = response.json() + + if 200 <= status < 400 and data.get("data"): + return data + + msg = data.get("message", "") + if status == 404: + msg = msg.partition(" ")[0].lower() + raise exception.NotFoundError(msg) + self.log.error("API request failed: %s", msg or "") + raise exception.StopExtraction() + + def _pagination(self, endpoint, params): while True: - response = self._parse(self.session.get(url, params=params)) + response = self._call(endpoint, params) yield from response["data"] cursor = response["page"]["cursor"] if not cursor: return params["cursor"] = cursor - - def _parse(self, response): - """Parse an API response""" - data = response.json() - if 200 <= response.status_code < 400 and data.get("data"): - return data - - msg = data.get("message") - if response.status_code == 404: - msg = msg.partition(" ")[0].lower() - raise exception.NotFoundError(msg) - else: - self.log.error("API request failed: %s", msg or "") - raise exception.StopExtraction() diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index cd7253ce..398d3623 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -88,7 +88,6 @@ AUTH_MAP = { "flickr" : "Optional (OAuth)", "idolcomplex": "Optional", "nijie" : "Required", - "pinterest" : "Optional (OAuth)", "pixiv" : "Required", "reddit" : "Optional (OAuth)", "sankaku" : "Optional", diff --git a/test/test_results.py b/test/test_results.py index 7f3fb94d..2a1933f8 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -22,6 +22,7 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { "luscious", # layout change + "pinterest", # access tokens have been set to 10 requests per hour "puremashiro", # online reader down } @@ -40,8 +41,6 @@ class TestExtractorResults(unittest.TestCase): config.set(("extractor", "deviantart", "client-id"), "7777") config.set(("extractor", "deviantart", "client-secret"), "ff14994c744d9208e5caeec7aab4a026") - config.set(("extractor", "pinterest", "access-token"), - "Ab1gUJFF5TFoWXRbX0p7_ue7jOHeFSX8iOrCIOZE24bOp0A6TQAAAAA") config.set(("extractor", "tumblr", "api-key"), "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6") From df7e18399eeaefa06f428a57094b89eeb61b2f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 17 Apr 2018 17:32:21 +0200 Subject: [PATCH 07/12] [luscious] fix image order --- gallery_dl/extractor/luscious.py | 2 +- test/test_results.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index e0e52d50..3c5ec2b7 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -74,7 +74,7 @@ class LusciousAlbumExtractor(AsynchronousExtractor): """Collect image-urls and -metadata""" num = 1 - if 'class="read-more-btn"' in page: + if 'class="search_filter' in page: url = "{}/pictures/album/x_{}/sorted/oldest/page/1/".format( self.root, self.gid) page = self.request(url).text diff --git a/test/test_results.py b/test/test_results.py index 2a1933f8..19bffa05 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,7 +21,6 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "luscious", # layout change "pinterest", # access tokens have been set to 10 requests per hour "puremashiro", # online reader down } From b1325d4d2c2e5ba3d12ff2af1d6d35335dada953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 18 Apr 2018 18:01:43 +0200 Subject: [PATCH 08/12] fix extractor docstrings --- gallery_dl/extractor/artstation.py | 2 +- gallery_dl/extractor/tumblr.py | 2 +- test/test_extractor.py | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index fbea9595..39e6fe6b 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -137,7 +137,7 @@ class ArtstationUserExtractor(ArtstationExtractor): class ArtstationAlbumExtractor(ArtstationExtractor): - """Extractor for all projects of an artstation user""" + """Extractor for all projects in an artstation album""" subcategory = "album" directory_fmt = ["{category}", "{userinfo[username]}", "Albums", "{album[id]} - {album[title]}"] diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index f853d964..c4e94dad 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -234,7 +234,7 @@ class TumblrTagExtractor(TumblrExtractor): class TumblrLikesExtractor(TumblrExtractor): - """Extractor for images from a tumblr-user by tag""" + """Extractor for images from a tumblr-user's liked posts""" subcategory = "likes" directory_fmt = ["{category}", "{name}", "likes"] archive_fmt = "f_{blog[name]}_{id}_{offset}" diff --git a/test/test_extractor.py b/test/test_extractor.py index 9c8b9a14..00d6f4ba 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -133,6 +133,17 @@ class TestExtractor(unittest.TestCase): msg = "'{}' isn't matched by any pattern".format(url) self.fail(msg) + def test_docstrings(self): + """ensure docstring uniqueness""" + for extr1 in extractor.extractors(): + for extr2 in extractor.extractors(): + if extr1 != extr2 and extr1.__doc__ and extr2.__doc__: + self.assertNotEqual( + extr1.__doc__, + extr2.__doc__, + "{} <-> {}".format(extr1, extr2), + ) + if __name__ == "__main__": unittest.main() From 10cc59f3b522e5d6080532709905b096281f76dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 18 Apr 2018 18:06:30 +0200 Subject: [PATCH 09/12] fix extractor names --- gallery_dl/extractor/slideshare.py | 2 +- test/test_extractor.py | 23 +++++++++++++++++++++++ test/test_results.py | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index 1a680d34..468d4074 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -12,7 +12,7 @@ from .common import Extractor, Message from .. import text, util -class SlideshareExtractor(Extractor): +class SlidesharePresentationExtractor(Extractor): """Extractor for images from a presentation on slideshare.net""" category = "slideshare" subcategory = "presentation" diff --git a/test/test_extractor.py b/test/test_extractor.py index 00d6f4ba..024c3d79 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -144,6 +144,29 @@ class TestExtractor(unittest.TestCase): "{} <-> {}".format(extr1, extr2), ) + def test_names(self): + """Ensure extractor classes are named CategorySubcategoryExtractor""" + mapping = { + "2chan" : "futaba", + "3dbooru": "threedeebooru", + "4chan" : "fourchan", + "4plebs" : "fourplebs", + "8chan" : "infinitychan", + "b4k" : "bfourk", + "oauth" : None, + "rbt" : "rebeccablacktech", + "whatisthisimnotgoodwithcomputers": "witingwc", + } + + for extr in extractor.extractors(): + category = mapping.get(extr.category, extr.category) + if category: + expected = "{}{}Extractor".format( + category.capitalize(), + extr.subcategory.capitalize(), + ) + self.assertEqual(expected, extr.__name__) + if __name__ == "__main__": unittest.main() diff --git a/test/test_results.py b/test/test_results.py index 19bffa05..fd370dab 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,6 +21,7 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { + "gomanga", # server down "pinterest", # access tokens have been set to 10 requests per hour "puremashiro", # online reader down } From 80bead739d182428a39b4b3b5e7ff91ebd10ae98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 20 Apr 2018 15:31:05 +0200 Subject: [PATCH 10/12] [oauth] require custom client-* values for pinterest --- gallery_dl/extractor/oauth.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index c8269f03..8dc6e26f 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -9,7 +9,7 @@ """Utility classes to setup OAuth and link a users account to gallery-dl""" from .common import Extractor, Message -from . import deviantart, flickr, pinterest, reddit, tumblr +from . import deviantart, flickr, reddit, tumblr from .. import text, util, config import os import urllib.parse @@ -206,16 +206,18 @@ class OAuthPinterest(OAuthBase): def items(self): yield Message.Version, 1 + client_id = self.oauth_config("client-id") + client_secret = self.oauth_config("client-secret") + + if not client_id or not client_secret: + self.log.error("'client-id' and 'client-secret' required") + return + self._oauth2_authorization_code_grant( - self.oauth_config( - "client-id", pinterest.PinterestAPI.CLIENT_ID), - self.oauth_config( - "client-secret", pinterest.PinterestAPI.CLIENT_SECRET), + client_id, client_secret, "https://api.pinterest.com/oauth/", "https://api.pinterest.com/v1/oauth/token", - scope="read_public", - key="access_token", - auth=False, + scope="read_public", key="access_token", auth=False, ) From eb37fbf0e84d1dbbe86784802dba1597a3a23823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 20 Apr 2018 18:22:57 +0200 Subject: [PATCH 11/12] [hentaifoundry] improve extractor - use common base class - better pagination - respect '.../page/' - implement skip() / --range support - get YII_CSRF_TOKEN from cookies --- gallery_dl/extractor/hentaifoundry.py | 164 +++++++++++++------------- 1 file changed, 85 insertions(+), 79 deletions(-) diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 2fe4daa7..63f37274 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -10,70 +10,44 @@ from .common import Extractor, Message from .. import text, util, exception +from urllib.parse import urljoin -class HentaifoundryUserExtractor(Extractor): - """Extractor for all images of a hentai-foundry-user""" +class HentaifoundryExtractor(Extractor): category = "hentaifoundry" - subcategory = "user" directory_fmt = ["{category}", "{artist}"] filename_fmt = "{category}_{index}_{title}.{extension}" archive_fmt = "{index}" - pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com/" - r"(?:pictures/user/([^/]+)/?$|user/([^/]+)/profile)"] - test = [ - ("https://www.hentai-foundry.com/pictures/user/Tenpura", { - "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28", - "keyword": "f8fecc8aa89978ecf402ec221243978fe791bd54", - }), - ("http://www.hentai-foundry.com/user/asdq/profile", { - "exception": exception.NotFoundError, - }), - ] - base_url = "https://www.hentai-foundry.com/pictures/user/" + root = "https://www.hentai-foundry.com" + per_page = 25 - def __init__(self, match): + def __init__(self, artist): Extractor.__init__(self) - self.artist = match.group(1) or match.group(2) + self.artist = artist + self.artist_url = "{}/pictures/user/{}".format(self.root, self.artist) + self.start_post = 0 def items(self): - data, token = self.get_job_metadata() - self.set_filters(token) + data = self.get_job_metadata() yield Message.Version, 1 yield Message.Directory, data - for url, image in self.get_images(data["count"]): + + for page_url in util.advance(self.get_image_pages(), self.start_post): + url, image = self.get_image_metadata(page_url) image.update(data) + if not image["extension"]: + image["extension"] = "jpg" yield Message.Url, url, image - def get_images(self, count): - """Yield url and keywords for all images of one artist""" - num = 1 - needle = 'thumbTitle">Pictures (', ')', pos) - return {"artist": self.artist, "count": util.safe_int(count)}, token def get_image_metadata(self, url): """Collect metadata for an image""" - page = self.request(url).text + page = self.request(urljoin(self.root, url)).text index = url.rsplit("/", 2)[1] title, pos = text.extract( page, 'Pictures » ', '<') @@ -83,8 +57,65 @@ class HentaifoundryUserExtractor(Extractor): text.nameext_from_url(part, data) return "https://pictures.hentai-foundry.com" + part, data - def set_filters(self, token): + +class HentaifoundryUserExtractor(HentaifoundryExtractor): + """Extractor for all images of a hentai-foundry-user""" + subcategory = "user" + pattern = [r"(?:https?://)?(?:www\.)?hentai-foundry\.com/" + r"(?:pictures/user/([^/]+)(?:/(?:page/(\d+))?)?$" + r"|user/([^/]+)/profile)"] + test = [ + ("https://www.hentai-foundry.com/pictures/user/Tenpura", { + "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28", + "keyword": "f8fecc8aa89978ecf402ec221243978fe791bd54", + }), + ("http://www.hentai-foundry.com/user/asdq/profile", { + "exception": exception.NotFoundError, + }), + ("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3", None), + ] + + def __init__(self, match): + HentaifoundryExtractor.__init__(self, match.group(1) or match.group(3)) + self.start_page = util.safe_int(match.group(2), 1) + self._skipped = (self.start_page - 1) * self.per_page + + def skip(self, num): + pages, posts = divmod(num, self.per_page) + self.start_page += pages + self.start_post += posts + self._skipped += num + return num + + def get_image_pages(self): + num = self.start_page + + while True: + url = "{}/page/{}".format(self.artist_url, num) + page = self.request(url).text + yield from text.extract_iter(page, 'thumbTitle">Pictures (', ')')[0]) + if self._skipped >= count: + raise exception.StopExtraction() + + self.set_filters() + return {"artist": self.artist, "count": count} + + def set_filters(self): """Set site-internal filters to show all images""" + token = text.extract( + self.session.cookies["YII_CSRF_TOKEN"], "%22", "%22")[0] formdata = { "YII_CSRF_TOKEN": token, "rating_nudity": 3, @@ -114,55 +145,30 @@ class HentaifoundryUserExtractor(Extractor): method="post", data=formdata, allow_empty=True) -class HentaifoundryImageExtractor(Extractor): +class HentaifoundryImageExtractor(HentaifoundryExtractor): """Extractor for a single image from hentaifoundry.com""" - category = "hentaifoundry" subcategory = "image" - directory_fmt = ["{category}", "{artist}"] - filename_fmt = "{category}_{index}_{title}.{extension}" - archive_fmt = "{index}" pattern = [(r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com/" r"(?:pictures/user/([^/]+)/(\d+)" r"|[^/]/([^/]+)/(\d+))")] test = [ - (("http://www.hentai-foundry.com/" - "pictures/user/Tenpura/407501/shimakaze"), { + (("http://www.hentai-foundry.com" + "/pictures/user/Tenpura/407501/shimakaze"), { "url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3", - "keyword": "85b8e26fa93d00ae1333cb7b418078f1792dc4a8", + "keyword": "2956321893e9187edde4aeac6bed889449692e6a", "content": "91bf01497c39254b6dfb234a18e8f01629c77fd1", }), ("http://www.hentai-foundry.com/pictures/user/Tenpura/340853/", { - "exception": exception.NotFoundError, + "exception": exception.HttpError, }), ] def __init__(self, match): - Extractor.__init__(self) - self.artist = match.group(1) or match.group(3) + HentaifoundryExtractor.__init__(self, match.group(1) or match.group(3)) self.index = match.group(2) or match.group(4) - def items(self): - url, data = self.get_image_metadata() - yield Message.Version, 1 - yield Message.Directory, data - yield Message.Url, url, data + def get_image_pages(self): + return ("{}/{}?enterAgree=1".format(self.artist_url, self.index),) - def get_image_metadata(self): - """Collect metadata for an image""" - url = "https://www.hentai-foundry.com/pictures/user/{}/{}".format( - self.artist, self.index) - response = self.request(url + "?enterAgree=1", fatal=False) - if response.status_code == 404: - raise exception.NotFoundError("image") - extr = text.extract - page = response.text - artist, pos = extr(page, ' » ', '<', pos) - url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos) - data = { - "artist": artist, - "index": util.safe_int(self.index), - "title": text.unescape(title), - } - text.nameext_from_url(url, data) - return "https://pictures.hentai-foundry.com" + url, data + def get_job_metadata(self): + return {"artist": self.artist, "index": util.safe_int(self.index)} From a2020c736e8f947a7f44a5daf4cdfd598a29e17b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 20 Apr 2018 18:42:09 +0200 Subject: [PATCH 12/12] release version 1.3.4 --- CHANGELOG.md | 8 +++++++- README.rst | 4 ++-- gallery_dl/version.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02430bd0..1edb2263 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Changelog -## Unreleased +## 1.3.4 - 2018-04-20 +- Added support for custom OAuth2 credentials for `pinterest` +- Improved rate limit handling for `tumblr` extractors +- Improved `hentaifoundry` extractors +- Improved `imgur` URL patterns +- Fixed miscellaneous extraction issues for `luscious` and `komikcast` +- Removed `loveisover` and `spectrumnexus` ## 1.3.3 - 2018-04-06 - Added extractors for diff --git a/README.rst b/README.rst index 5bb73eb1..af008c44 100644 --- a/README.rst +++ b/README.rst @@ -215,12 +215,12 @@ access to *gallery-dl*. Authorize it and you will he shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Complete List: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.3/gallery-dl.exe +.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.4/gallery-dl.exe .. _Python: https://www.python.org/downloads/ .. _Requests: https://pypi.python.org/pypi/requests/ .. _PyPI: https://pypi.python.org/pypi .. _pip: https://pip.pypa.io/en/stable/ -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.3.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.4.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _OAuth: https://en.wikipedia.org/wiki/OAuth diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 6d2cdfd6..6cd64a90 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.3.4-dev" +__version__ = "1.3.4"