From f7ba19a1c0bbac5c293aa91b47cda9a111740ed0 Mon Sep 17 00:00:00 2001 From: enduser420 <91022934+enduser420@users.noreply.github.com> Date: Tue, 4 Oct 2022 12:53:24 +0530 Subject: [PATCH] [nana] add 'nana' extractors (#2967) --- docs/configuration.rst | 11 +++ docs/gallery-dl.conf | 4 ++ docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/nana.py | 115 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 3 + test/test_results.py | 4 ++ 7 files changed, 144 insertions(+) create mode 100644 gallery_dl/extractor/nana.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 84f7bd2c..a3e8b7fc 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1725,6 +1725,17 @@ Description Also emit metadata for text-only posts without media content. +extractor.nana.favkey +--------------------- +Type + ``string`` +Default + ``null`` +Description + Your `Nana Favorite Key `__, + used to access your favorite archives. + + extractor.newgrounds.flash -------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 1c565ece..8ebb32ab 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -189,6 +189,10 @@ "format": "original", "include": "art" }, + "nana": + { + "favkey": null + }, "nijie": { "username": null, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 79b35755..e9a1a518 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -523,6 +523,12 @@ Consider all sites to be NSFW unless otherwise known. Galleries + + Nana + https://nana.my.id/ + Galleries, Favorites, Search Results + + Naver https://blog.naver.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fed6998c..91caa0cd 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -90,6 +90,7 @@ modules = [ "mememuseum", "myhentaigallery", "myportfolio", + "nana", "naver", "naverwebtoon", "newgrounds", diff --git a/gallery_dl/extractor/nana.py b/gallery_dl/extractor/nana.py new file mode 100644 index 00000000..6062418b --- /dev/null +++ b/gallery_dl/extractor/nana.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://nana.my.id/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, exception +import json + + +class NanaGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from nana.my.id""" + category = "nana" + directory_fmt = ("{category}", "{title}") + pattern = r"(?:https?://)?nana\.my\.id/reader/([^/?#]+)" + test = ( + (("https://nana.my.id/reader/" + "059f7de55a4297413bfbd432ce7d6e724dd42bae"), { + "pattern": r"https://nana\.my\.id/reader/" + r"\w+/image/page\?path=.*\.\w+", + "title" : "Everybody Loves Shion", + "artist" : "fuzui", + "tags" : list, + "count" : 29, + }), + (("https://nana.my.id/reader/" + "77c8712b67013e427923573379f5bafcc0c72e46"), { + "pattern": r"https://nana\.my\.id/reader/" + r"\w+/image/page\?path=.*\.\w+", + "title" : "Lovey-Dovey With an Otaku-Friendly Gyaru", + "artist" : "Sueyuu", + "tags" : ["Sueyuu"], + "count" : 58, + }), + ) + + def __init__(self, match): + self.gallery_id = match.group(1) + url = "https://nana.my.id/reader/" + self.gallery_id + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + title = text.unescape( + text.extract(page, '  ', '')[0]) + artist = text.unescape(text.extract( + page, '', '')[0])[len(title):-10] + tags = text.extract(page, 'Reader.tags = "', '"')[0] + + return { + "gallery_id": self.gallery_id, + "title" : title, + "artist" : artist[4:] if artist.startswith(" by ") else "", + "tags" : tags.split(", ") if tags else (), + "lang" : "en", + "language" : "English", + } + + def images(self, page): + data = json.loads(text.extract(page, "Reader.pages = ", ".pages")[0]) + return [ + ("https://nana.my.id" + image, None) + for image in data["pages"] + ] + + +class NanaSearchExtractor(Extractor): + """Extractor for nana search results""" + category = "nana" + subcategory = "search" + pattern = r"(?:https?://)?nana\.my\.id(?:/?\?([^#]+))" + test = ( + ('https://nana.my.id/?q=+"elf"&sort=desc', { + "pattern": NanaGalleryExtractor.pattern, + "range": "1-100", + "count": 100, + }), + ("https://nana.my.id/?q=favorites%3A", { + "pattern": NanaGalleryExtractor.pattern, + "count": ">= 2", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.params = text.parse_query(match.group(1)) + self.params["p"] = text.parse_int(self.params.get("p"), 1) + self.params["q"] = self.params.get("q") or "" + + def items(self): + if "favorites:" in self.params["q"]: + favkey = self.config("favkey") + if not favkey: + raise exception.AuthenticationError( + "'Favorite key' not provided. " + "Please see 'https://nana.my.id/tutorial'") + self.session.cookies.set("favkey", favkey, domain="nana.my.id") + + data = {"_extractor": NanaGalleryExtractor} + while True: + try: + page = self.request( + "https://nana.my.id", params=self.params).text + except exception.HttpError: + return + + for gallery in text.extract_iter( + page, '
', '
'): + url = "https://nana.my.id" + text.extract( + gallery, '