diff --git a/docs/configuration.rst b/docs/configuration.rst index eb4cfd91..329c67c0 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2605,6 +2605,32 @@ Description the first in the list gets chosen (usually `mp3`). +extractor.koharu.cbz +-------------------- +Type + ``bool`` +Default + ``true`` +Description + Download each gallery as a single ``.cbz`` file. + + Disabling this option causes a gallery + to be downloaded as individual image files. + + +extractor.koharu.format +----------------------- +Type + ``string`` +Default + ``"original"`` +Description + Name of the image format to download. + + | Available formats are + | ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"``/``"original"`` + + extractor.lolisafe.domain ------------------------- Type diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 36226654..a18beca9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -475,6 +475,12 @@ Consider all listed sites to potentially be NSFW. Soundtracks + + Koharu + https://koharu.to/ + Galleries, Search Results + + Komikcast https://komikcast.lol/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index beb6fb47..0a5d3bc2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -85,6 +85,7 @@ modules = [ "keenspot", "kemonoparty", "khinsider", + "koharu", "komikcast", "lensdump", "lexica", diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py new file mode 100644 index 00000000..b209830b --- /dev/null +++ b/gallery_dl/extractor/koharu.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://koharu.to/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?i)(?:https?://)?(?:koharu|anchira)\.to" + + +class KoharuGalleryExtractor(GalleryExtractor): + """Extractor for koharu galleries""" + category = "koharu" + root = "https://koharu.to" + root_api = "https://api.koharu.to" + filename_fmt = "{num:>03}.{extension}" + directory_fmt = ("{category}", "{id} {title}") + archive_fmt = "{id}_{num}" + pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)" + example = "https://koharu.to/g/12345/67890abcde/" + + def __init__(self, match): + GalleryExtractor.__init__(self, match) + self.gallery_url = None + + def _init(self): + self.headers = { + "Accept" : "*/*", + "Referer": self.root + "/", + "Origin" : self.root, + } + + self.fmt = self.config("format") + self.cbz = self.config("cbz", True) + + if self.cbz: + self.filename_fmt = "{id} {title}.{extension}" + self.directory_fmt = ("{category}",) + + def metadata(self, _): + url = "{}/books/detail/{}/{}".format( + self.root_api, self.groups[0], self.groups[1]) + self.data = data = self.request(url, headers=self.headers).json() + data.pop("rels", None) + data.pop("thumbnails", None) + return data + + def images(self, _): + data = self.data + fmt = self._select_format(data["data"]) + + url = "{}/books/data/{}/{}/{}/{}".format( + self.root_api, + data["id"], data["public_key"], + fmt["id"], fmt["public_key"], + ) + params = { + "v": data["updated_at"], + "w": fmt["w"], + } + + if self.cbz: + params["action"] = "dl" + base = self.request( + url, method="POST", params=params, headers=self.headers, + ).json()["base"] + url = "{}?v={}&w={}".format(base, data["updated_at"], fmt["w"]) + info = text.nameext_from_url(base) + if not info["extension"]: + info["extension"] = "cbz" + return ((url, info),) + + data = self.request(url, params=params, headers=self.headers).json() + base = data["base"] + + results = [] + for entry in data["entries"]: + dimensions = entry["dimensions"] + info = {"w": dimensions[0], "h": dimensions[1]} + results.append((base + entry["path"], info)) + return results + + def _select_format(self, formats): + if not self.fmt or self.fmt == "original": + fmtid = "0" + else: + fmtid = str(self.fmt) + + try: + fmt = formats[fmtid] + except KeyError: + raise exception.NotFoundError("format") + + fmt["w"] = fmtid + return fmt + + +class KoharuSearchExtractor(Extractor): + """Extractor for koharu search results""" + category = "koharu" + subcategory = "search" + root = "https://koharu.to" + root_api = "https://api.koharu.to" + request_interval = (1.0, 2.0) + pattern = BASE_PATTERN + r"/\?([^#]*)" + example = "https://koharu.to/?s=QUERY" + + def _init(self): + self.headers = { + "Accept" : "*/*", + "Referer": self.root + "/", + "Origin" : self.root, + } + + def items(self): + url_api = self.root_api + "/books" + params = text.parse_query(self.groups[0]) + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + data = self.request( + url_api, params=params, headers=self.headers).json() + + try: + entries = data["entries"] + except KeyError: + return + + for entry in entries: + url = "{}/g/{}/{}/".format( + self.root, entry["id"], entry["public_key"]) + entry["_extractor"] = KoharuGalleryExtractor + yield Message.Queue, url, entry + + try: + if data["limit"] * data["page"] >= data["total"]: + return + except Exception: + pass + params["page"] += 1 diff --git a/test/results/koharu.py b/test/results/koharu.py new file mode 100644 index 00000000..198a64bc --- /dev/null +++ b/test/results/koharu.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import koharu + + +__tests__ = ( +{ + "#url" : "https://koharu.to/g/14216/6c67076fdd45", + "#category": ("", "koharu", "gallery"), + "#class" : koharu.KoharuGalleryExtractor, + "#pattern" : r"https://kisakisexo.xyz/download/59896/a4fbd1828229/f47639c6abaf1903dd69c36a3d961da84741a1831aa07a2906ce9c74156a5d75\?v=1721626410802&w=0", + "#count" : 1, + + "count" : 1, + "created_at": 1721626410802, + "extension" : "cbz", + "filename" : "f47639c6abaf1903dd69c36a3d961da84741a1831aa07a2906ce9c74156a5d75", + "id" : 14216, + "num" : 1, + "public_key": "6c67076fdd45", + "tags" : list, + "title" : "[Ouchi Kaeru] Summer Business (Comic Kairakuten 2024-08)", + "updated_at": 1721626410802, +}, + +{ + "#url" : "https://koharu.to/g/14216/6c67076fdd45", + "#category": ("", "koharu", "gallery"), + "#class" : koharu.KoharuGalleryExtractor, + "#options" : {"cbz": False, "format": "780"}, + "#pattern" : r"https://koharusexo.xyz/data/59905/2df9110af7f1/a7cbeca3fb9c83aa87582a8a74cc8f8ce1b9e9b434dc1af293628871642f42df/[0-9a-f]+/.+", + "#count" : 22, + +}, + +{ + "#url" : "https://anchira.to/g/14216/6c67076fdd45", + "#category": ("", "koharu", "gallery"), + "#class" : koharu.KoharuGalleryExtractor, +}, + +{ + "#url" : "https://koharu.to/reader/14216/6c67076fdd45", + "#category": ("", "koharu", "gallery"), + "#class" : koharu.KoharuGalleryExtractor, +}, + +{ + "#url" : "https://koharu.to/?s=tag:^beach$", + "#category": ("", "koharu", "search"), + "#class" : koharu.KoharuSearchExtractor, + "#pattern" : koharu.KoharuGalleryExtractor.pattern, + "#count" : ">= 50", +}, + +)