[mangafreak] update & fix

- fix manga and title extraction - fix 'chapter_minor' - extend test results
2026-01-25 19:49:56 +01:00
parent 58662f900a
commit 4e71e2f7e7
2 changed files with 73 additions and 33 deletions
--- a/gallery_dl/extractor/mangafreak.py
+++ b/gallery_dl/extractor/mangafreak.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-

-# Copyright 2025 Mike Fährmann
-#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
@@ -11,7 +9,7 @@
 from .common import ChapterExtractor, MangaExtractor
 from .. import text

-BASE_PATTERN = r"(?:https?://)?(?:ww\d\.)?mangafreak\.me"
+BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]\.)?mangafreak\.me"


 class MangafreakBase():
@@ -22,33 +20,30 @@ class MangafreakBase():

 class MangafreakChapterExtractor(MangafreakBase, ChapterExtractor):
    """Extractor for mangafreak manga chapters"""
-    pattern = BASE_PATTERN + r"(/Read1_(.+)_(\d+[a-z]?))"
+    pattern = BASE_PATTERN + r"(/Read1_([^/?#]+)_((\d+)([a-z])?))"
    example = "https://ww2.mangafreak.me/Read1_Onepunch_Man_1"

    def metadata(self, page):
-        extr = text.extract_from(page)
-        manga = text.unescape(extr("<title>", " Chapter "))
-        title = text.unescape(extr("", " - MangaFreak"))
-
-        chapter_str = self.groups[2]
-        chapter, sep, minor = chapter_str.partition("e")
+        manga = text.extr(page, "<title>Read ", " Chapter ")
+        title = text.extr(page, 'selected="selected">', "<").partition(": ")[2]
+        _, manga_slug, chapter_string, chapter, minor = self.groups

        return {
-            "manga"        : manga,
-            "title"        : title,
+            "manga"        : text.unescape(manga),
+            "manga_slug"   : manga_slug,
+            "title"        : text.unescape(title) if title else "",
            "chapter"      : text.parse_int(chapter),
-            "chapter_minor": sep + minor,
-            "chapter_string": chapter_str,
-            "manga_slug"   : self.groups[1],
+            "chapter_minor": "" if minor is None else minor,
+            "chapter_string": chapter_string,
            "lang"         : "en",
            "language"     : "English",
        }

    def images(self, page):
+        base = "https://images.mangafreak.me/mangas/"
        return [
-            ("https://images.mangafreak.me/mangas/" + path, None)
-            for path in text.extract_iter(
-                page, 'src="https://images.mangafreak.me/mangas/', '"')
+            (base + path, None)
+            for path in text.extract_iter(page, 'src="' + base, '"')
        ]


@@ -59,34 +54,32 @@ class MangafreakMangaExtractor(MangafreakBase, MangaExtractor):
    example = "https://ww2.mangafreak.me/Manga/Onepunch_Man"

    def chapters(self, page):
-        extr = text.extract_from(page)
-        manga = text.unescape(extr("<title>", " Manga"))
-
-        chapter_table = text.extr(page, "<table>", "</table>")
-        if not chapter_table:
-            return []
+        table = text.extr(page, "<table>", "</table>")
+        if not table:
+            return ()

        data = {
-            "manga"     : manga,
+            "manga"     : text.unescape(text.extr(page, "<title>", " Manga")),
            "manga_slug": self.groups[1],
            "lang"      : "en",
            "language"  : "English",
        }

        results = []
-        for row in text.extract_iter(chapter_table, "<tr>", "</tr>"):
+        chapter_match = text.re(r"(\d+)(\w*)").match
+        for row in text.extract_iter(table, "<tr>", "</tr>"):
            href = text.extr(row, '<a href="', '"')
            if not href:
                continue
            url = self.root + href
-            chapter_str = url.rpartition("_")[2]
-            chapter, sep, minor = chapter_str.partition("e")
-
+            chapter_string = href.rpartition("_")[2]
+            chapter, minor = chapter_match(chapter_string).groups()
+            title = text.extr(row, '">', '<').partition(" - ")[2]
            results.append((url, {
-                "chapter"      : text.parse_int(chapter),
-                "chapter_minor": sep + minor,
-                "chapter_string": chapter_str,
+                "chapter"       : text.parse_int(chapter),
+                "chapter_minor" : minor,
+                "chapter_string": chapter_string,
+                "title"         : text.unescape(title) if title else "",
                **data,
            }))
-
        return results