[mangataro] fix 'manga' extractor (#8930)

2026-01-26 22:38:31 +01:00
parent 4fab8e0dd8
commit f6ce8c8579
2 changed files with 51 additions and 19 deletions
--- a/gallery_dl/extractor/mangataro.py
+++ b/gallery_dl/extractor/mangataro.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2025 Mike Fährmann
+# Copyright 2025-2026 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,6 +11,8 @@
 from .common import ChapterExtractor, MangaExtractor
 from .. import text
 from ..cache import memcache
+import hashlib
+import time

 BASE_PATTERN = r"(?:https?://)?mangataro\.org"

@@ -59,27 +61,56 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
 class MangataroMangaExtractor(MangataroBase, MangaExtractor):
    """Extractor for mangataro manga"""
    chapterclass = MangataroChapterExtractor
-    pattern = BASE_PATTERN + r"(/manga/([^/?#]+))"
+    pattern = BASE_PATTERN + r"/manga/([^/?#]+)"
    example = "https://mangataro.org/manga/MANGA"

-    def chapters(self, page):
-        slug = self.groups[1]
-        manga = _manga_info(self, slug)
+    def chapters(self, _):
+        manga = _manga_info(self, self.groups[0])
+
+        url = self.root + "/auth/manga-chapters"
+        params = {
+            "manga_id": manga["manga_id"],
+            "offset"  : 0,
+            "limit"   : 500,  # values higher than 500 have no effect
+            "order"   : "DESC",
+        }
+        headers = {
+            "Referer"       : manga["manga_url"],
+            "Sec-Fetch-Dest": "empty",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Site": "same-origin",
+        }

        results = []
-        for url in text.extract_iter(text.extr(
-                page, '<div class="chapter-list', '<div id="tab-gallery"'),
-                '<a href="', '"'):
-            chapter, _, chapter_id = url[url.rfind("/")+3:].rpartition("-")
-            chapter, sep, minor = chapter.partition("-")
-            results.append((url, {
-                **manga,
-                "chapter"      : text.parse_int(chapter),
-                "chapter_minor": "." + minor if sep else "",
-                "chapter_id"   : text.parse_int(chapter_id),
-            }))
+        while True:
+            self._update_params(params)
+            data = self.request_json(url, params=params, headers=headers)
+
+            for ch in data["chapters"]:
+                chapter, sep, minor = ch["chapter"].partition(".")
+                results.append((ch["url"], {
+                    **manga,
+                    "chapter_id"   : text.parse_int(ch.pop("id", None)),
+                    **ch,
+                    "chapter"      : text.parse_int(chapter),
+                    "chapter_minor": "." + minor if sep else "",
+                }))
+
+            if not data.get("has_more"):
+                break
+            params["offset"] += (data.get("limit") or params["limit"])
        return results

+    def _update_params(self, params):
+        # adapted from dazedcat19/FMD2
+        # https://github.com/dazedcat19/FMD2/blob/master/lua/modules/MangaTaro.lua
+        if (ts := int(time.time())) == params.get("_ts"):
+            return
+        Y, m, d, H, _, _, _, _, _ = time.gmtime(ts)
+        secret = f"{ts}mng_ch_{Y:>04}{m:>02}{d:>02}{H:>02}"
+        params["_t"] = hashlib.md5(secret.encode()).hexdigest()[:16]
+        params["_ts"] = ts
+

@memcache(keyarg=1)
 def _manga_info(self, slug):
@@ -89,6 +120,7 @@ def _manga_info(self, slug):

    return {
        "manga"      : manga["name"].rpartition(" | ")[0].rpartition(" ")[0],
+        "manga_id"   : text.extr(page, 'data-manga-id="', '"'),
        "manga_url"  : manga["url"],
        "cover"      : manga["image"],
        "author"     : manga["author"]["name"].split(", "),
--- a/test/results/mangataro.py
+++ b/test/results/mangataro.py
@@ -106,11 +106,11 @@ __tests__ = (
    "#url"     : "https://mangataro.org/manga/lookism",
    "#class"   : mangataro.MangataroMangaExtractor,
    "#pattern" : mangataro.MangataroChapterExtractor.pattern,
-    "#count"   : 573,
+    "#count"   : range(580, 800),

-    "chapter"      : range(1, 573),
+    "chapter"      : range(1, 800),
    "chapter_id"   : int,
-    "chapter_minor": "",
+    "chapter_minor": {"", ".1", ".5"},
    "cover"        : "https://mangataro.org/content/media/208866l.webp",
    "description"  : "<p>Park Hyung Suk has spent all 17 years of his life at the bottom of the food chain. Short, overweight, and unattractive, he is used to being bullied by his classmates and constantly discriminated against for his looks. In an attempt to escape his biggest bully, Lee Tae Sung, he decides to transfer to Seoul’s Jae Won High School, a vocational preparatory school notorious for its liberal education system and carefree students. Days before his transfer, Hyung Suk wakes to find that he is no longer in his usual chubby body, but is instead in a perfect body! Tall, handsome, and beautifully toned, Hyung Suk has become the ideal version of himself. The only problem is that his original body still lays beside him—and when one body falls asleep, he awakens in the other. Now possessing two extremely different bodies, Hyung Suk must learn to navigate his new and much more popular life at J High whilst also solving the mystery of where his second, almost superhuman, body came from. [Written by MAL Rewrite]</p></div><div class=\"mt-6 pt-6 border-t border-neutral-700/30\"><div class=\"flex items-center gap-2 mb-3\"> <svg class=\"w-4 h-4 text-neutral-400\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\"> <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z\" /> </svg><h4 class=\"text-sm font-medium text-neutral-100\">Background</h4></div><div class=\"max-w-none text-neutral-400 text-justify text-xs\"><p>Lookism is originally a webtoon which first volume was officially published in paperbook format by &Book (대원앤북) on May 25, 2017. The series has been serialized in English by LINE Webtoon since June 4, 2017.</p>",
    "genre"        : "Manhwa",