diff --git a/CHANGELOG.md b/CHANGELOG.md
index 844de878..0765ef0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
# Changelog
-## Unreleased
+## 1.3.5 - 2018-05-04
+- Added support for:
+ - `smugmug` - https://www.smugmug.com/
+- Added title information for `mangadex` chapters
+- Improved the `pinterest` API implementation ([#83](https://github.com/mikf/gallery-dl/issues/83))
+- Improved error handling for `deviantart` and `tumblr`
+- Removed `gomanga` and `puremashiro`
## 1.3.4 - 2018-04-20
- Added support for custom OAuth2 credentials for `pinterest`
diff --git a/README.rst b/README.rst
index af008c44..cbdfdfc6 100644
--- a/README.rst
+++ b/README.rst
@@ -215,12 +215,12 @@ access to *gallery-dl*. Authorize it and you will he shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Complete List: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.4/gallery-dl.exe
+.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.5/gallery-dl.exe
.. _Python: https://www.python.org/downloads/
.. _Requests: https://pypi.python.org/pypi/requests/
.. _PyPI: https://pypi.python.org/pypi
.. _pip: https://pip.pypa.io/en/stable/
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.4.zip
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.5.zip
.. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
diff --git a/docs/configuration.rst b/docs/configuration.rst
index 6c464d41..f3fef6a0 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -820,35 +820,6 @@ How To
=========== =====
-extractor.pinterest.access-token
---------------------------------
-=========== =====
-Type ``string``
-How To - register a Pinterest application and use its client-id and
- client-secret (see `extractor.pinterest.client-id & .secret`_)
- - run `gallery-dl oauth:pinterest` and authenticate access with
- (preferably) the same account that registered the application
-Notes Access tokens currently only allow for 10 requests per hour.
-=========== =====
-
-
-extractor.pinterest.client-id & .secret
----------------------------------------
-=========== =====
-Type ``string``
-How To - login and visit Pinterest's
- `Apps `__ section
- - agree to "Pinterest Developer Terms and the API Policy"
- and click "Create app"
- - choose a random name and description and click "Create"
- - scroll down and set a Site URL (e.g. https://example.org/)
- and allow https://mikf.github.io/gallery-dl/oauth-redirect.html
- as Redirect URI
- - scroll back up again, copy the "App ID" and "App secret" values
- and put them in your configuration file
-=========== =====
-
-
extractor.reddit.client-id & .user-agent
----------------------------------------
=========== =====
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
index e55ba8b9..3a7a8fcf 100644
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@@ -24,7 +24,6 @@ Flickr https://www.flickr.com/ |Images from Use-2|
Futaba Channel https://www.2chan.net/ Threads
Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches
Gfycat https://gfycat.com/ individual Images
-GoManga https://gomanga.co/ Chapters, Manga
HBrowse http://www.hbrowse.com/ Chapters, Manga
Hentai Foundry https://www.hentai-foundry.com/ Images from Users, individual Images
Hentai2Read https://hentai2read.com/ Chapters, Manga
@@ -58,7 +57,6 @@ Pawoo https://pawoo.net Images from Users, Imag
Pinterest https://www.pinterest.com Boards, Pins, pin.it Links
Pixiv https://www.pixiv.net/ |Images from Use-4| Required
PowerManga https://powermanga.org/ Chapters, Manga
-Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga
Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics
RebeccaBlackTech https://rbt.asia/ Threads
Reddit https://reddit.com/ individual Images, Submissions, Subreddits Optional (OAuth)
@@ -70,6 +68,7 @@ Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga
Sen Manga http://raw.senmanga.com/ Chapters
Sense-Scans http://sensescans.com/ Chapters, Manga
SlideShare https://www.slideshare.net/ Presentations
+SmugMug https://www.smugmug.com/ |Albums, individ-5|
Subapics https://subapics.com/ Chapters, Manga
The /b/ Archive https://thebarchive.com/ Threads
Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth)
@@ -93,3 +92,4 @@ Turboimagehost https://turboimagehost.com/ individual Images
.. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
.. |Images from Use-3| replace:: Images from Users, Doujin, Favorites, individual Images
.. |Images from Use-4| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images
+.. |Albums, individ-5| replace:: Albums, individual Images, Images from Users and Folders
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 1a9c28c3..59213681 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -31,7 +31,6 @@ modules = [
"flickr",
"gelbooru",
"gfycat",
- "gomanga",
"hbrowse",
"hentai2read",
"hentaifoundry",
@@ -65,7 +64,6 @@ modules = [
"pinterest",
"pixiv",
"powermanga",
- "puremashiro",
"readcomiconline",
"rebeccablacktech",
"reddit",
@@ -77,6 +75,7 @@ modules = [
"senmanga",
"sensescans",
"slideshare",
+ "smugmug",
"subapics",
"thebarchive",
"tumblr",
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index b9e0c868..5cb76f34 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -21,10 +21,9 @@ class DirectlinkExtractor(Extractor):
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P[^/?#]*))?(?:#(?P.*))?$"]
test = [
- (("https://photos.smugmug.com/The-World/Hawaii/"
- "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), {
- "url": "32ee1045881e17ef3f13a9958595afa42421ec6c",
- "keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10",
+ (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
+ "url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
+ "keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e",
}),
# more complex example
("https://example.org/path/file.webm?que=1&ry=2#fragment", {
diff --git a/gallery_dl/extractor/gomanga.py b/gallery_dl/extractor/gomanga.py
deleted file mode 100644
index 0e547a74..00000000
--- a/gallery_dl/extractor/gomanga.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2017 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://gomanga.co/"""
-
-from . import foolslide
-
-
-class GomangaChapterExtractor(foolslide.FoolslideChapterExtractor):
- """Extractor for manga-chapters from gomanga.co"""
- category = "gomanga"
- pattern = foolslide.chapter_pattern(r"(?:www\.)?gomanga\.co/reader")
- test = [
- ("https://gomanga.co/reader/read/mata-kata-omou/en/0/1/page/11", {
- "url": "5088d75bb44327fc503c85b52b1d6a371b8057f2",
- "keyword": "10624e78924c37fd39543270a6965f2082bde08f",
- }),
- ("https://gomanga.co/reader/read/pastel/en/31/144/", {
- "url": "9cc2052fbf36344c573c754c5abe533a14b3e280",
- "keyword": "a355cd3197e70c24b84d3885e8a5ff0ac22537bf",
- }),
- ]
- method = "double"
-
-
-class GomangaMangaExtractor(foolslide.FoolslideMangaExtractor):
- """Extractor for manga from gomanga.co"""
- category = "gomanga"
- pattern = foolslide.manga_pattern(r"(?:www\.)?gomanga\.co/reader")
- test = [("https://gomanga.co/reader/series/pastel/", {
- "url": "bd1c82d70838d54140a8209296e789f27ceab7cd",
- "keyword": "fb1fd14548602dbe4f6e70a633429762972c1d5d",
- })]
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 8dc6e26f..f161126e 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -198,29 +198,6 @@ class OAuthFlickr(OAuthBase):
)
-class OAuthPinterest(OAuthBase):
- subcategory = "pinterest"
- pattern = ["oauth:pinterest$"]
- redirect_uri = "https://mikf.github.io/gallery-dl/oauth-redirect.html"
-
- def items(self):
- yield Message.Version, 1
-
- client_id = self.oauth_config("client-id")
- client_secret = self.oauth_config("client-secret")
-
- if not client_id or not client_secret:
- self.log.error("'client-id' and 'client-secret' required")
- return
-
- self._oauth2_authorization_code_grant(
- client_id, client_secret,
- "https://api.pinterest.com/oauth/",
- "https://api.pinterest.com/v1/oauth/token",
- scope="read_public", key="access_token", auth=False,
- )
-
-
class OAuthReddit(OAuthBase):
subcategory = "reddit"
pattern = ["oauth:reddit$"]
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 67f7b318..a244cf9e 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -10,13 +10,14 @@
from .common import Extractor, Message
from .. import text, exception
+import json
class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
- filename_fmt = "{category}_{pin_id}.{extension}"
- archive_fmt = "{pin_id}"
+ filename_fmt = "{category}_{id}.{extension}"
+ archive_fmt = "{id}"
def __init__(self):
Extractor.__init__(self)
@@ -24,15 +25,11 @@ class PinterestExtractor(Extractor):
def data_from_pin(self, pin):
"""Get image url and metadata from a pin-object"""
- img = pin["image"]["original"]
+ img = pin["images"]["orig"]
url = img["url"]
- data = {
- "pin_id": text.parse_int(pin["id"]),
- "note": pin["note"],
- "width": text.parse_int(img["width"]),
- "height": text.parse_int(img["height"]),
- }
- return url, text.nameext_from_url(url, data)
+ pin["width"] = img["width"]
+ pin["height"] = img["height"]
+ return url, text.nameext_from_url(url, pin)
class PinterestPinExtractor(PinterestExtractor):
@@ -42,13 +39,11 @@ class PinterestPinExtractor(PinterestExtractor):
test = [
("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
- "keyword": "f651cb271247f306d1d30385d49c7b82da44c2b1",
- "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
+ # image version depends on CDN server used
+ # "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
+ # "content": "4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca",
}),
("https://www.pinterest.com/pin/858146903966145188/", {
- "exception": exception.StopExtraction,
- }),
- ("https://www.pinterest.com/pin/85814690396614518/", {
"exception": exception.NotFoundError,
}),
]
@@ -68,13 +63,13 @@ class PinterestPinExtractor(PinterestExtractor):
class PinterestBoardExtractor(PinterestExtractor):
"""Extractor for images from a board from pinterest.com"""
subcategory = "board"
- directory_fmt = ["{category}", "{user}", "{board}"]
+ directory_fmt = ["{category}", "{board[owner][username]}", "{board[name]}"]
+ archive_fmt = "{board[id]}_{id}"
pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.[^/]+/"
r"(?!pin/)([^/?#&]+)/([^/?#&]+)"]
test = [
("https://www.pinterest.com/g1952849/test-/", {
"url": "85911dfca313f3f7f48c2aa0bc684f539d1d80a6",
- "keyword": "c54cf5aa830994f2ed4871efa7154a5fdaa1c2ce",
}),
("https://www.pinterest.com/g1952848/test/", {
"exception": exception.NotFoundError,
@@ -83,30 +78,18 @@ class PinterestBoardExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self)
- self.user, self.board = match.groups()
+ self.user = text.unquote(match.group(1))
+ self.board = text.unquote(match.group(2))
def items(self):
board = self.api.board(self.user, self.board)
- data = self.data_from_board(board)
- num = data["count"]
+ data = {"board": board, "count": board["pin_count"]}
yield Message.Version, 1
yield Message.Directory, data
- for pin in self.api.board_pins(self.user, self.board):
- url, pdata = self.data_from_pin(pin)
- data.update(pdata)
- data["num"] = num
- num -= 1
- yield Message.Url, url, data
-
- def data_from_board(self, board):
- """Get metadata from a board-object"""
- data = {
- "user": self.user,
- "board_id": text.parse_int(board["id"]),
- "board": board["name"],
- "count": board["counts"]["pins"],
- }
- return data
+ for pin in self.api.board_pins(board["id"]):
+ url, pin_data = self.data_from_pin(pin)
+ pin_data.update(data)
+ yield Message.Url, url, pin_data
class PinterestPinitExtractor(PinterestExtractor):
@@ -136,59 +119,67 @@ class PinterestPinitExtractor(PinterestExtractor):
class PinterestAPI():
- """Minimal interface for the pinterest API"""
+ """Minimal interface for the Pinterest Web API
- def __init__(self, extractor, access_token=None):
- self.log = extractor.log
- self.session = extractor.session
- self.access_token = (
- access_token or
- extractor.config("access-token") or
- "AfyIXxi1MJ6et0NlIl_vBchHbex-FSWylPyr2GJE2uu3W8A97QAAAAA"
- )
+ For a better and more complete implementation in PHP, see
+ - https://github.com/seregazhuk/php-pinterest-bot
+ """
- def pin(self, pin_id, fields="id,image,note"):
+ BASE_URL = "https://uk.pinterest.com"
+ HEADERS = {
+ "Accept" : "application/json, text/javascript, "
+ "*/*, q=0.01",
+ "Accept-Language" : "en-US,en;q=0.5",
+ "X-Pinterest-AppState": "active",
+ "X-APP-VERSION" : "cb1c7f9",
+ "X-Requested-With" : "XMLHttpRequest",
+ "Origin" : BASE_URL + "/",
+ }
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+
+ def pin(self, pin_id):
"""Query information about a pin"""
- endpoint = "pins/{}/".format(pin_id)
- params = {"fields": fields}
- return self._call(endpoint, params)["data"]
+ options = {"id": pin_id, "field_set_key": "detailed"}
+ return self._call("Pin", options)["resource_response"]["data"]
- def board(self, user, board, fields="id,name,counts"):
+ def board(self, user, board):
"""Query information about a board"""
- endpoint = "boards/{}/{}/".format(user, board)
- params = {"fields": fields}
- return self._call(endpoint, params)["data"]
+ options = {"slug": board, "username": user,
+ "field_set_key": "detailed"}
+ return self._call("Board", options)["resource_response"]["data"]
- def board_pins(self, user, board, fields="id,image,note", limit=100):
+ def board_pins(self, board_id):
"""Yield all pins of a specific board"""
- endpoint = "boards/{}/{}/pins/".format(user, board)
- params = {"fields": fields, "limit": limit}
- return self._pagination(endpoint, params)
+ options = {"board_id": board_id}
+ return self._pagination("BoardFeed", options)
- def _call(self, endpoint, params):
- params["access_token"] = self.access_token
- url = "https://api.pinterest.com/v1/" + endpoint
+ def _call(self, resource, options):
+ url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource)
+ params = {"data": json.dumps({"options": options}), "source_url": ""}
- response = self.session.get(url, params=params)
- status = response.status_code
+ response = self.extractor.request(
+ url, params=params, headers=self.HEADERS, fatal=False)
data = response.json()
- if 200 <= status < 400 and data.get("data"):
+ if 200 <= response.status_code < 400 and not response.history:
return data
- msg = data.get("message", "")
- if status == 404:
- msg = msg.partition(" ")[0].lower()
- raise exception.NotFoundError(msg)
- self.log.error("API request failed: %s", msg or "")
+ if response.status_code == 404 or response.history:
+ raise exception.NotFoundError(self.extractor.subcategory)
+ self.extractor.log.error("API request failed")
raise exception.StopExtraction()
- def _pagination(self, endpoint, params):
+ def _pagination(self, resource, options):
while True:
- response = self._call(endpoint, params)
- yield from response["data"]
+ data = self._call(resource, options)
+ yield from data["resource_response"]["data"]
- cursor = response["page"]["cursor"]
- if not cursor:
+ try:
+ bookmarks = data["resource"]["options"]["bookmarks"]
+ if not bookmarks or bookmarks[0] == "-end-":
+ return
+ options["bookmarks"] = bookmarks
+ except KeyError:
return
- params["cursor"] = cursor
diff --git a/gallery_dl/extractor/powermanga.py b/gallery_dl/extractor/powermanga.py
index 281e20db..3b3be1a1 100644
--- a/gallery_dl/extractor/powermanga.py
+++ b/gallery_dl/extractor/powermanga.py
@@ -18,7 +18,7 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor):
test = [(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
- "keyword": "9bf211d435060d1e38d3d13e4aaaa5a87381bfad",
+ "keyword": "9985bcb78491dff9c725958b06bba606be51b6d3",
})]
diff --git a/gallery_dl/extractor/puremashiro.py b/gallery_dl/extractor/puremashiro.py
deleted file mode 100644
index 00699faa..00000000
--- a/gallery_dl/extractor/puremashiro.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2018 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for http://reader.puremashiro.moe/"""
-
-from . import foolslide
-
-
-class PuremashiroChapterExtractor(foolslide.FoolslideChapterExtractor):
- """Extractor for manga-chapters from reader.puremashiro.moe"""
- category = "puremashiro"
- pattern = foolslide.chapter_pattern(r"reader\.puremashiro\.moe")
- test = [(("http://reader.puremashiro.moe"
- "/read/parallel-paradise-eng/en-us/0/20/"), {
- "url": "00d5bc9cbb413ed584ddb091ae2418ca7801b136",
- "keyword": "73bba3222758927e5a7cdc9e1db9d8307fe944c0",
- })]
- scheme = "http"
-
-
-class PuremashiroMangaExtractor(foolslide.FoolslideMangaExtractor):
- """Extractor for manga from reader.puremashiro.moe"""
- category = "puremashiro"
- pattern = foolslide.manga_pattern(r"reader\.puremashiro\.moe")
- test = [("http://reader.puremashiro.moe/series/hayate-no-gotoku/", {
- "url": "0cf77a623bff35b43323427a8fd1e40ff0e13c09",
- "keyword": "1b57d724b259a1d81b6352d889a1aa5eb86a6ef9",
- })]
- scheme = "http"
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
new file mode 100644
index 00000000..3306bc58
--- /dev/null
+++ b/gallery_dl/extractor/smugmug.py
@@ -0,0 +1,298 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from https://www.smugmug.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+
+BASE_PATTERN = (
+ r"(?:smugmug:(?!album:)(?:https?://)?([^/]+)|"
+ r"(?:https?://)?([^.]+)\.smugmug\.com)")
+
+
+class SmugmugExtractor(Extractor):
+ """Base class for smugmug extractors"""
+ category = "smugmug"
+ filename_fmt = ("{category}_{User[NickName]}_"
+ "{Image[UploadKey]}_{Image[ImageKey]}.{extension}")
+
+ def __init__(self):
+ Extractor.__init__(self)
+ self.api = SmugmugAPI(self)
+
+ @staticmethod
+ def _apply_largest(image, delete=True):
+ largest = image["Uris"]["LargestImage"]
+ if delete:
+ del image["Uris"]
+ for key in ("Url", "Width", "Height", "MD5", "Size", "Watermarked"):
+ if key in largest:
+ image[key] = largest[key]
+ return image["Url"]
+
+
+class SmugmugAlbumExtractor(SmugmugExtractor):
+ """Extractor for smugmug albums"""
+ subcategory = "album"
+ directory_fmt = ["{category}", "{User[NickName]}", "{Album[Name]}"]
+ archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
+ pattern = [r"smugmug:album:([^:]+)$"]
+ test = [
+ ("smugmug:album:MN6kHH", {
+ "count": 0,
+ }),
+ ("smugmug:album:6Ffcgk", {
+ "count": 1,
+ "pattern": ".*/i-L4CxBdg/0/33e0b290/X3/i-L4CxBdg-X3.jpg",
+ }),
+ ("smugmug:album:drn76C", {
+ "count": 2,
+ "content": "864f6953cb04121290407a579611bc5087d117ee",
+ }),
+ ]
+
+ def __init__(self, match):
+ SmugmugExtractor.__init__(self)
+ self.album_id = match.group(1)
+
+ def items(self):
+ album = self.api.album(self.album_id, "User")
+ user = album["Uris"]["User"]
+
+ del user["Uris"]
+ del album["Uris"]
+ data = {"Album": album, "User": user}
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+
+ for image in self.api.album_images(self.album_id, "LargestImage"):
+ url = self._apply_largest(image)
+ data["Image"] = image
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class SmugmugImageExtractor(SmugmugExtractor):
+ """Extractor for individual smugmug images"""
+ subcategory = "image"
+ directory_fmt = ["{category}", "{User[NickName]}"]
+ archive_fmt = "{Image[ImageKey]}"
+ pattern = [BASE_PATTERN + r"(?:/[^/?]+)+/i-([^/?]+)"]
+ test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", {
+ "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4",
+ "keyword": "df63d36cfaeb128fda86802942d9a4271d3feafc",
+ "content": "626fe50d25fe49beeda15e116938db36e163c01f",
+ })]
+
+ def __init__(self, match):
+ SmugmugExtractor.__init__(self)
+ self.image_id = match.group(3)
+
+ def items(self):
+ image = self.api.image(self.image_id, "LargestImage,ImageOwner")
+ user = image["Uris"]["ImageOwner"]
+ url = self._apply_largest(image)
+
+ del user["Uris"]
+ data = {"Image": image, "User": user}
+ text.nameext_from_url(url, data)
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+ yield Message.Url, url, data
+
+
+class SmugmugPathExtractor(SmugmugExtractor):
+ """Extractor for smugmug albums from URL paths and users"""
+ subcategory = "path"
+ pattern = [BASE_PATTERN + r"((?:/[^/?a-mo-z][^/?]*)*)/?$"]
+ test = [
+ ("https://mikf.smugmug.com/Test/", {
+ "pattern": "smugmug:album:xgkb4C$",
+ }),
+ ("https://mikf.smugmug.com/Test/n-xnNH3s", {
+ "pattern": "smugmug:album:xgkb4C$",
+ }),
+ ("https://mikf.smugmug.com/", {
+ "count": 4,
+ "pattern": "smugmug:album:(xgkb4C|MN6kHH|6Ffcgk|drn76C)$",
+ }),
+ ("smugmug:www.creativedogportraits.com/PortfolioGallery/", {
+ "pattern": "smugmug:album:txWXzs$",
+ }),
+ ("smugmug:www.creativedogportraits.com/", {
+ "pattern": "smugmug:album:txWXzs$",
+ }),
+ ("smugmug:https://www.creativedogportraits.com/", None),
+ ]
+
+ def __init__(self, match):
+ SmugmugExtractor.__init__(self)
+ self.domain, self.user, self.path = match.groups()
+
+ def items(self):
+ yield Message.Version, 1
+
+ if not self.user:
+ self.user = self.api.site_user(self.domain)["NickName"]
+
+ if self.path:
+ data = self.api.user_urlpathlookup(self.user, self.path)
+ node = data["Uris"]["Node"]
+
+ if node["Type"] == "Album":
+ nodes = (node,)
+ elif node["Type"] == "Folder":
+ nodes = self.album_nodes(node)
+ else:
+ nodes = ()
+
+ for node in nodes:
+ album_id = node["Uris"]["Album"].rpartition("/")[2]
+ yield Message.Queue, "smugmug:album:" + album_id, node
+
+ else:
+ for album in self.api.user_albums(self.user):
+ uri = "smugmug:album:" + album["AlbumKey"]
+ yield Message.Queue, uri, album
+
+ def album_nodes(self, root):
+ """Yield all descendant album nodes of 'root'"""
+ for node in self.api.node_children(root["NodeID"]):
+ if node["Type"] == "Album":
+ yield node
+ elif node["Type"] == "Folder":
+ yield from self.album_nodes(node)
+
+
+class SmugmugAPI():
+ """Minimal interface for the smugmug API v2"""
+ API_DOMAIN = "api.smugmug.com"
+ API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK"
+ API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S"
+ "9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq")
+ HEADERS = {"Accept": "application/json"}
+
+ def __init__(self, extractor):
+ api_key = extractor.config("api-key", self.API_KEY)
+ api_secret = extractor.config("api-secret", self.API_SECRET)
+ token = extractor.config("access-token")
+ token_secret = extractor.config("access-token-secret")
+
+ if api_key and api_secret and token and token_secret:
+ self.session = util.OAuthSession(
+ extractor.session,
+ api_key, api_secret,
+ token, token_secret,
+ )
+ self.api_key = None
+ else:
+ self.session = extractor.session
+ self.api_key = api_key
+
+ self.log = extractor.log
+
+ def album(self, album_id, expands=None):
+ return self._expansion("album/" + album_id, expands)
+
+ def image(self, image_id, expands=None):
+ return self._expansion("image/" + image_id, expands)
+
+ def node(self, node_id, expands=None):
+ return self._expansion("node/" + node_id, expands)
+
+ def user(self, username, expands=None):
+ return self._expansion("user/" + username, expands)
+
+ def album_images(self, album_id, expands=None):
+ return self._pagination("album/" + album_id + "!images", expands)
+
+ def node_children(self, node_id, expands=None):
+ return self._pagination("node/" + node_id + "!children", expands)
+
+ def user_albums(self, username, expands=None):
+ return self._pagination("user/" + username + "!albums", expands)
+
+ def site_user(self, domain):
+ return self._call("!siteuser", domain=domain)["Response"]["User"]
+
+ def user_urlpathlookup(self, username, path):
+ endpoint = "user/" + username + "!urlpathlookup"
+ params = {"urlpath": path}
+ return self._expansion(endpoint, "Node", params)
+
+ def _call(self, endpoint, params=None, domain=API_DOMAIN):
+ url = "https://{}/api/v2/{}".format(domain, endpoint)
+ params = params or {}
+ if self.api_key:
+ params["APIKey"] = self.api_key
+ params["_verbosity"] = "1"
+
+ response = self.session.get(url, params=params, headers=self.HEADERS)
+ data = response.json()
+
+ if 200 <= data["Code"] < 400:
+ return data
+ if data["Code"] == 404:
+ raise exception.NotFoundError()
+ if data["Code"] == 429:
+ self.log.error("Rate limit reached")
+ else:
+ self.log.error("API request failed")
+ self.log.debug(data)
+ raise exception.StopExtraction()
+
+ def _expansion(self, endpoint, expands, params=None):
+ endpoint = self._extend(endpoint, expands)
+ result = self._apply_expansions(self._call(endpoint, params), expands)
+ if not result:
+ raise exception.NotFoundError()
+ return result[0]
+
+ def _pagination(self, endpoint, expands=None):
+ endpoint = self._extend(endpoint, expands)
+ params = {"start": 1, "count": 100}
+
+ while True:
+ data = self._call(endpoint, params)
+ yield from self._apply_expansions(data, expands)
+
+ if "NextPage" not in data["Response"]["Pages"]:
+ return
+ params["start"] += params["count"]
+
+ @staticmethod
+ def _extend(endpoint, expands):
+ if expands:
+ endpoint += "?_expand=" + expands
+ return endpoint
+
+ @staticmethod
+ def _apply_expansions(data, expands):
+
+ def unwrap(response):
+ locator = response["Locator"]
+ return response[locator] if locator in response else []
+
+ objs = unwrap(data["Response"])
+ if not isinstance(objs, list):
+ objs = (objs,)
+
+ if "Expansions" in data:
+ expansions = data["Expansions"]
+ expands = expands.split(",")
+
+ for obj in objs:
+ uris = obj["Uris"]
+
+ for name in expands:
+ uri = uris[name]
+ uris[name] = unwrap(expansions[uri])
+
+ return objs
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 0138aee2..770ca03f 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text, util, exception
+from datetime import datetime, timedelta
import re
import time
@@ -322,10 +323,11 @@ class TumblrAPI():
# daily rate limit
if response.headers.get("x-ratelimit-perday-remaining") == "0":
+ reset = response.headers.get("x-ratelimit-perday-reset")
self.log.error(
"Daily API rate limit exceeded: aborting; "
- "%s seconds until rate limit reset",
- response.headers.get("x-ratelimit-perday-reset"),
+ "rate limit will reset at %s",
+ self._to_time(reset),
)
raise exception.StopExtraction()
@@ -334,11 +336,19 @@ class TumblrAPI():
if reset:
self.log.info(
"Hourly API rate limit exceeded; "
- "waiting %s seconds for rate limit reset",
- reset,
+ "waiting until %s for rate limit reset",
+ self._to_time(reset),
)
time.sleep(int(reset) + 1)
return self._call(blog, endpoint, params)
self.log.error(data)
raise exception.StopExtraction()
+
+ @staticmethod
+ def _to_time(reset):
+ try:
+ reset_time = datetime.now() + timedelta(seconds=int(reset))
+ except (ValueError, TypeError):
+ return "?"
+ return reset_time.strftime("%H:%M:%S")
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index af754f07..642162e4 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -510,11 +510,11 @@ class OAuthSession():
self.params["oauth_signature_method"] = "HMAC-SHA1"
self.params["oauth_version"] = "1.0"
- def get(self, url, params):
+ def get(self, url, params, **kwargs):
params.update(self.params)
params["oauth_nonce"] = self.nonce(16)
params["oauth_timestamp"] = int(time.time())
- return self.session.get(url + self.sign(url, params))
+ return self.session.get(url + self.sign(url, params), **kwargs)
def sign(self, url, params):
"""Generate 'oauth_signature' value and return query string"""
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 191fe110..45c77642 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.3.5-dev"
+__version__ = "1.4.0-dev"
diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py
index 398d3623..db12075d 100755
--- a/scripts/build_supportedsites.py
+++ b/scripts/build_supportedsites.py
@@ -5,7 +5,7 @@ import os.path
ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.realpath(ROOTDIR))
-import gallery_dl.extractor
+import gallery_dl.extractor # noqa
CATEGORY_MAP = {
@@ -20,7 +20,6 @@ CATEGORY_MAP = {
"e621" : "e621",
"exhentai" : "ExHentai",
"fallenangels" : "Fallen Angels Scans",
- "gomanga" : "GoManga",
"hbrowse" : "HBrowse",
"hentai2read" : "Hentai2Read",
"hentaifoundry" : "Hentai Foundry",
@@ -30,14 +29,11 @@ CATEGORY_MAP = {
"imagebam" : "ImageBam",
"imagefap" : "ImageFap",
"imgbox" : "imgbox",
- "imgchili" : "imgChili",
"imgth" : "imgth",
"imgur" : "imgur",
"jaiminisbox" : "Jaimini's Box",
"kireicake" : "Kirei Cake",
- "kisscomic" : "KissComic",
"kissmanga" : "KissManga",
- "loveisover" : "Love is Over Archive",
"mangadex" : "MangaDex",
"mangafox" : "Manga Fox",
"mangahere" : "Manga Here",
@@ -48,7 +44,6 @@ CATEGORY_MAP = {
"nyafuu" : "Nyafuu Archive",
"paheal" : "rule #34",
"powermanga" : "PowerManga",
- "puremashiro" : "Pure Mashiro",
"readcomiconline": "Read Comic Online",
"rbt" : "RebeccaBlackTech",
"rule34" : "Rule 34",
@@ -58,10 +53,9 @@ CATEGORY_MAP = {
"senmanga" : "Sen Manga",
"sensescans" : "Sense-Scans",
"slideshare" : "SlideShare",
- "spectrumnexus" : "Spectrum Nexus",
+ "smugmug" : "SmugMug",
"thebarchive" : "The /b/ Archive",
"worldthree" : "World Three",
- "yeet" : "YEET Archive",
"xvideos" : "XVideos",
}
@@ -72,6 +66,7 @@ SUBCATEGORY_MAP = {
"issue" : "Comic-Issues",
"manga" : "Manga",
"me" : "pixiv.me Links",
+ "path" : "Images from Users and Folders",
"pinit" : "pin.it Links",
"popular": "Popular Images",
"search" : "Search Results",
diff --git a/test/test_results.py b/test/test_results.py
index fd370dab..2e13ebea 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -16,14 +16,11 @@ from gallery_dl import extractor, job, config, exception
# these don't work on travis-ci
TRAVIS_SKIP = {
"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",
- "archivedmoe", "archiveofsins", "thebarchive",
+ "archivedmoe", "archiveofsins", "thebarchive", "sankaku", "idolcomplex",
}
# temporary issues, etc.
BROKEN = {
- "gomanga", # server down
- "pinterest", # access tokens have been set to 10 requests per hour
- "puremashiro", # online reader down
}
@@ -146,9 +143,9 @@ def generate_tests():
fltr = lambda c, bc: c in argv or bc in argv # noqa: E731
del sys.argv[1:]
else:
- skip = BROKEN.copy()
+ skip = set(BROKEN)
if "CI" in os.environ and "TRAVIS" in os.environ:
- skip |= TRAVIS_SKIP
+ skip |= set(TRAVIS_SKIP)
print("skipping:", ", ".join(skip))
fltr = lambda c, bc: c not in skip # noqa: E731