From 937a802b49122d470c2aa2999233062af5264a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 18 Feb 2019 12:24:13 +0100 Subject: [PATCH] [dynastyscans] add extractors for images and image searches (closes #163) --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/dynastyscans.py | 96 ++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 4fcbe120..ef24fe43 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -18,7 +18,7 @@ Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Desuarchive https://desuarchive.org/ Threads DeviantArt https://www.deviantart.com/ |Capabilities-1| Optional (OAuth) Doki Reader https://kobato.hologfx.com/reader/ Chapters, Manga -Dynasty Reader https://dynasty-scans.com/ Chapters +Dynasty Reader https://dynasty-scans.com/ Chapters, individual Images, Search Results e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Optional Fallen Angels Scans https://www.fascans.com/ Chapters, Manga diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index 30cf74c2..251b2205 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -8,16 +8,43 @@ """Extract manga-chapters from https://dynasty-scans.com/""" -from .common import ChapterExtractor +from .common import ChapterExtractor, Extractor, Message from .. import text import json import re -class DynastyscansChapterExtractor(ChapterExtractor): - """Extractor for manga-chapters from dynasty-scans.com""" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com" + + +class DynastyscansBase(): + """Base class for dynastyscans extractors""" category = "dynastyscans" - pattern = r"(?:https?://)?(?:www\.)?dynasty-scans\.com(/chapters/[^/?&#]+)" + root = "https://dynasty-scans.com" + + def _parse_image_page(self, image_id): + url = "{}/images/{}".format(self.root, image_id) + page = self.request(url).text + + date, pos = text.extract(page, "class='create_at'>", "") + tags, pos = text.extract(page, "class='tags'>", "", pos) + src , pos = text.extract(page, "class='btn-group'>", "", pos) + url , pos = text.extract(page, ' src="', '"', pos) + + src = text.extract(src, 'href="', '"')[0] if "Source<" in src else "" + + return { + "url": self.root + url, + "image_id": text.parse_int(image_id), + "tags": text.split_html(text.unescape(tags)), + "date": text.remove_html(date), + "source": text.unescape(src), + } + + +class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): + """Extractor for manga-chapters from dynasty-scans.com""" + pattern = BASE_PATTERN + r"(/chapters/[^/?&#]+)" test = ( (("http://dynasty-scans.com/chapters/" "hitoribocchi_no_oo_seikatsu_ch33"), { @@ -30,7 +57,6 @@ class DynastyscansChapterExtractor(ChapterExtractor): "keyword": "22b35029bc65d6d95db2e2c147b0a37f2d290f29", }), ) - root = "https://dynasty-scans.com" def metadata(self, page): info , pos = text.extract(page, "

", "") @@ -64,3 +90,63 @@ class DynastyscansChapterExtractor(ChapterExtractor): (self.root + img["image"], None) for img in json.loads(data) ] + + +class DynastyscansSearchExtractor(DynastyscansBase, Extractor): + """Extrator for image search results on dynasty-scans.com""" + subcategory = "search" + directory_fmt = ("{category}", "Images") + filename_fmt = "{image_id}.{extension}" + archive_fmt = "i_{image_id}" + pattern = BASE_PATTERN + r"/images(?:\?([^#]+))?$" + test = ("https://dynasty-scans.com/images?with[]=4930&with[]=5211", { + "url": "6b570eedd8a741c2cd34fb98b22a49d772f84191", + "keyword": "2a8f3d30584c637a0dd64ce8a0a2e81edaa6bca4", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.search_query = match.group(1) + + def items(self): + yield Message.Version, 1 + yield Message.Directory, {} + for image_id in self.images(): + data = self._parse_image_page(image_id) + url = data.pop("url") + yield Message.Url, url, text.nameext_from_url(url, data) + + def images(self): + url = self.root + "/images?" + self.search_query + params = {"page": 1} + + while True: + page = self.request(url, params=params).text + yield from text.extract_iter(page, '"/images/', '"') + if 'rel="next"' not in page: + return + params["page"] += 1 + + +class DynastyscansImageExtractor(DynastyscansBase, Extractor): + """Extractor for individual images on dynasty-scans.com""" + subcategory = "image" + directory_fmt = ("{category}", "Images") + filename_fmt = "{image_id}.{extension}" + pattern = BASE_PATTERN + r"/images/(\d+)" + test = ("https://dynasty-scans.com/images/1245", { + "url": "15e54bd94148a07ed037f387d046c27befa043b2", + "keyword": "384889567a19d2e907ff13f65b42f9560e15172d", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.image_id = match.group(1) + + def items(self): + data = self._parse_image_page(self.image_id) + url = data.pop("url") + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, url, text.nameext_from_url(url, data)