[madokami] add 'manga' extractor (#7828)

2025-07-17 20:40:26 +02:00
parent 493fc483c6
commit 1561284815
7 changed files with 172 additions and 0 deletions
--- a/gallery_dl/extractor/madokami.py
+++ b/gallery_dl/extractor/madokami.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://manga.madokami.al/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+
+BASE_PATTERN = r"(?:https?://)?manga\.madokami\.al"
+
+
+class MadokamiExtractor(Extractor):
+    """Base class for madokami extractors"""
+    category = "madokami"
+    root = "https://manga.madokami.al"
+
+
+class MadokamiMangaExtractor(MadokamiExtractor):
+    """Extractor for madokami manga"""
+    subcategory = "manga"
+    directory_fmt = ("{category}", "{manga}")
+    archive_fmt = "{chapter_id}"
+    pattern = rf"{BASE_PATTERN}/Manga/(\w/\w{{2}}/\w{{4}}/.+)"
+    example = "https://manga.madokami.al/Manga/A/AB/ABCD/ABCDE_TITLE"
+
+    def items(self):
+        username, password = self._get_auth_info()
+        if not username:
+            raise exception.LoginRequired("Missing 'username' & 'password'")
+        self.session.auth = util.HTTPBasicAuth(username, password)
+
+        url = f"{self.root}/Manga/{self.groups[0]}"
+        page = self.request(url).text
+        extr = text.extract_from(page)
+
+        chapters = []
+        while True:
+            if not (cid := extr('<tr data-record="', '"')):
+                break
+            chapters.append({
+                "chapter_id": text.parse_int(cid),
+                "path": text.unescape(extr('href="', '"')),
+                "chapter_string": text.unescape(extr(">", "<")),
+                "size": text.parse_bytes(extr("<td>", "</td>")),
+                "date": text.parse_datetime(
+                    extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
+            })
+
+        if self.config("chapter-reverse"):
+            chapters.reverse()
+
+        self.kwdict.update({
+            "manga" : text.unescape(extr('itemprop="name">', "<")),
+            "year"  : text.parse_int(extr(
+                'itemprop="datePublished" content="', "-")),
+            "author": text.split_html(extr('<p class="staff', "</p>"))[1::2],
+            "genre" : text.split_html(extr("<h3>Genres</h3>", "</div>")),
+            "tags"  : text.split_html(extr("<h3>Tags</h3>", "</div>")),
+            "complete": extr('span class="scanstatus">', "<").lower() == "yes",
+        })
+
+        parse_chinfo = text.re(
+            r"(?i).+?\s+("
+            r"(?:v(?:ol)?\.?\s*(\d+)\s+)?"
+            r"(?:ch?\.?\s*(\d+)(?:-(\d+))?)"
+            r")"
+        ).match
+
+        for ch in chapters:
+
+            chstr = ch["chapter_string"]
+            if match := parse_chinfo(chstr):
+                ch["chapter_string"], volume, chapter, end = match.groups()
+                ch["volume"] = text.parse_int(volume)
+                ch["chapter"] = text.parse_int(chapter)
+                ch["chapter_end"] = text.parse_int(end)
+            else:
+                ch["volume"] = ch["chapter"] = ch["chapter_end"] = 0
+
+            url = f"{self.root}{ch['path']}"
+            text.nameext_from_url(url, ch)
+
+            yield Message.Directory, ch
+            yield Message.Url, url, ch