From 217a0687efb06b60735edaec169952156d0a84a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 19 Jan 2019 18:11:20 +0100 Subject: [PATCH] [behance] add 'collection' extractor (closes #157) --- gallery_dl/extractor/behance.py | 53 +++++++++++++++++++++++-------- gallery_dl/extractor/mangapark.py | 4 +-- test/test_results.py | 1 + 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 6da704d9..995a31c1 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018 Mike Fährmann +# Copyright 2018-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,6 +17,26 @@ class BehanceExtractor(Extractor): category = "behance" root = "https://www.behance.net" + def items(self): + yield Message.Version, 1 + for gallery in self.galleries(): + yield Message.Queue, gallery["url"], gallery + + def galleries(self): + """Return all relevant gallery URLs""" + return () + + def _pagination(self, url, key): + headers = {"X-Requested-With": "XMLHttpRequest"} + params = {} + + while True: + data = self.request(url, headers=headers, params=params).json() + yield from data[key] + if not data.get("offset"): + return + params["offset"] = data["offset"] + class BehanceGalleryExtractor(BehanceExtractor): """Extractor for image galleries from www.behance.net""" @@ -122,8 +142,7 @@ class BehanceUserExtractor(BehanceExtractor): """Extractor for a user's galleries from www.behance.net""" subcategory = "user" categorytransfer = True - pattern = [r"(?:https?://)?(?:www\.)?behance\.net" - r"/(?!gallery/)([^/?&#]+)/?$"] + pattern = [r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$"] test = [("https://www.behance.net/alexstrohl", { "count": ">= 8", "pattern": BehanceGalleryExtractor.pattern[0], @@ -133,18 +152,24 @@ class BehanceUserExtractor(BehanceExtractor): BehanceExtractor.__init__(self) self.user = match.group(1) - def items(self): + def galleries(self): url = "{}/{}".format(self.root, self.user) - headers = {"X-Requested-With": "XMLHttpRequest"} - params = {"offset": None} + return self._pagination(url, "section_content") - yield Message.Version, 1 - while True: - data = self.request(url, headers=headers, params=params).json() - for gallery in data["section_content"]: - yield Message.Queue, gallery["url"], gallery +class BehanceCollectionExtractor(BehanceExtractor): + """Extractor for a collection's galleries from www.behance.net""" + subcategory = "collection" + pattern = [r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"] + test = [("https://www.behance.net/collection/170615607/Sky", { + "count": ">= 13", + "pattern": BehanceGalleryExtractor.pattern[0], + })] - if "offset" not in data: - return - params["offset"] = data["offset"] + def __init__(self, match): + BehanceExtractor.__init__(self) + self.collection_id = match.group(1) + + def galleries(self): + url = "{}/collection/{}/a".format(self.root, self.collection_id) + return self._pagination(url, "output") diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index f1bbdbfb..bed037a4 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -44,8 +44,8 @@ class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor): r"(/manga/[^/?&#]+)/?$"] test = [ ("https://mangapark.me/manga/aria", { - "url": "aae6bf44e4360a1b0f5aa5fd74339cac6d616d20", - "keyword": "b7440cc4cd68d0262703da1ceadaecd34bdaacb0", + "url": "a58be23ef3874fe9705b0b41dd462b67eaaafd9a", + "keyword": "b3b5a30aa2a326bc0ca8b74c65b5ecd4bf676ebf", }), ("https://mangapark.net/manga/aria", None), ("https://mangapark.com/manga/aria", None), diff --git a/test/test_results.py b/test/test_results.py index d3331f35..01215799 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -25,6 +25,7 @@ TRAVIS_SKIP = { BROKEN = { "desuarchive", "mangahere", + "ngomik", "rbt", }