[weebcentral] add support (#6778)

2025-01-10 22:08:01 +01:00
parent 4853406fe3
commit 1d75c8308c
7 changed files with 229 additions and 1 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -400,6 +400,7 @@ Default
        ``soundgasm``,
        ``urlgalleries``,
        ``vk``,
+        ``weebcentral``,
        ``zerochan``
    * ``"1.0-2.0"``
        ``flickr``,
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -435,7 +435,8 @@
        {
            "cookies": null,

-            "files"  :  ["images", "image_large", "attachments", "postfile", "content"]
+            "files"        : ["images", "image_large", "attachments", "postfile", "content"],
+            "format-images": "download_url"
        },
        "pillowfort":
        {
@@ -701,6 +702,10 @@
            "api-key" : null,
            "metadata": false
        },
+        "weebcentral":
+        {
+            "sleep-request": "0.5-1.5"
+        },
        "weibo":
        {
            "sleep-request": "1.0-2.0",
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1051,6 +1051,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Comics, Episodes</td>
    <td></td>
 </tr>
+<tr>
+    <td>Weeb Central</td>
+    <td>https://weebcentral.com/</td>
+    <td>Chapters, Manga</td>
+    <td></td>
+</tr>
 <tr>
    <td>Weibo</td>
    <td>https://www.weibo.com/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -190,6 +190,7 @@ modules = [
    "weasyl",
    "webmshare",
    "webtoons",
+    "weebcentral",
    "weibo",
    "wikiart",
    "wikifeet",
--- a/gallery_dl/extractor/weebcentral.py
+++ b/gallery_dl/extractor/weebcentral.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://weebcentral.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?weebcentral\.com"
+
+
+class WeebcentralBase():
+    category = "weebcentral"
+    root = "https://weebcentral.com"
+    request_interval = (0.5, 1.5)
+
+    @memcache(keyarg=1)
+    def _extract_manga_data(self, manga_id):
+        url = "{}/series/{}".format(self.root, manga_id)
+        page = self.request(url).text
+        extr = text.extract_from(page)
+
+        return {
+            "manga_id": manga_id,
+            "lang"    : "en",
+            "language": "English",
+            "manga"   : text.unescape(extr("<title>", " | Weeb Central")),
+            "author"  : text.split_html(extr("<strong>Author", "</li>"))[1::2],
+            "tags"    : text.split_html(extr("<strong>Tag", "</li>"))[1::2],
+            "type"    : text.remove_html(extr("<strong>Type: ", "</li>")),
+            "status"  : text.remove_html(extr("<strong>Status: ", "</li>")),
+            "release" : text.remove_html(extr("<strong>Released: ", "</li>")),
+            "official": ">Yes" in extr("<strong>Official Translatio", "</li>"),
+            "description": text.unescape(text.remove_html(extr(
+                "<strong>Description", "</li>"))),
+        }
+
+
+class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
+    """Extractor for manga chapters from weebcentral.com"""
+    pattern = BASE_PATTERN + r"(/chapters/(\w+))"
+    example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV"
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        manga_id = extr("'series_id': '", "'")
+
+        data = self._extract_manga_data(manga_id)
+        data["chapter_id"] = self.groups[1]
+        data["chapter_type"] = extr("'chapter_type': '", "'")
+
+        chapter, sep, minor = extr("'number': '", "'").partition(".")
+        data["chapter"] = text.parse_int(chapter)
+        data["chapter_minor"] = sep + minor
+
+        return data
+
+    def images(self, page):
+        referer = self.gallery_url
+        url = referer + "/images"
+        params = {
+            "is_prev"      : "False",
+            "current_page" : "1",
+            "reading_style": "long_strip",
+        }
+        headers = {
+            "Accept"        : "*/*",
+            "Referer"       : referer,
+            "HX-Request"    : "true",
+            "HX-Current-URL": referer,
+        }
+        page = self.request(url, params=params, headers=headers).text
+        extr = text.extract_from(page)
+
+        results = []
+        while True:
+            src = extr(' src="', '"')
+            if not src:
+                break
+            results.append((src, {
+                "width" : text.parse_int(extr(' width="' , '"')),
+                "height": text.parse_int(extr(' height="', '"')),
+            }))
+        return results
+
+
+class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
+    """Extractor for manga from weebcentral.com"""
+    chapterclass = WeebcentralChapterExtractor
+    pattern = BASE_PATTERN + r"/series/(\w+)"
+    example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE"
+
+    def __init__(self, match):
+        MangaExtractor.__init__(self, match, False)
+
+    def chapters(self, _):
+        manga_id = self.groups[0]
+        referer = "{}/series/{}".format(self.root, manga_id)
+        url = referer + "/full-chapter-list"
+        headers = {
+            "Accept"        : "*/*",
+            "Referer"       : referer,
+            "HX-Request"    : "true",
+            "HX-Target"     : "chapter-list",
+            "HX-Current-URL": referer,
+        }
+        page = self.request(url, headers=headers).text
+        extr = text.extract_from(page)
+        data = self._extract_manga_data(manga_id)
+        base = self.root + "/chapters/"
+
+        results = []
+        while True:
+            chapter_id = extr("/chapters/", '"')
+            if not chapter_id:
+                break
+            type, _, chapter = extr('<span class="">', "<").partition(" ")
+            chapter, sep, minor = chapter.partition(".")
+
+            chapter = {
+                "chapter_id"   : chapter_id,
+                "chapter"      : text.parse_int(chapter),
+                "chapter_minor": sep + minor,
+                "chapter_type" : type,
+                "date"         : text.parse_datetime(
+                    extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"),
+            }
+            chapter.update(data)
+            results.append((base + chapter_id, chapter))
+        return results
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -155,6 +155,7 @@ CATEGORY_MAP = {
    "wallpapercave"  : "Wallpaper Cave",
    "webmshare"      : "webmshare",
    "webtoons"       : "Webtoon",
+    "weebcentral"    : "Weeb Central",
    "wikiart"        : "WikiArt.org",
    "wikigg"         : "wiki.gg",
    "wikimediacommons": "Wikimedia Commons",
--- a/test/results/weebcentral.py
+++ b/test/results/weebcentral.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import weebcentral
+
+
+__tests__ = (
+{
+    "#url"     : "https://weebcentral.com/chapters/01J76XZ4PC3VW91BYFBQJA44C3",
+    "#class"   : weebcentral.WeebcentralChapterExtractor,
+    "#pattern" : r"https://official\.lowee\.us/manga/Aria/0067\.5-0\d\d\.png",
+    "#count"   : 17,
+
+    "author"       : ["AMANO Kozue"],
+    "chapter"      : 67,
+    "chapter_id"   : "01J76XZ4PC3VW91BYFBQJA44C3",
+    "chapter_minor": ".5",
+    "chapter_type" : "Navigation",
+    "count"        : 17,
+    "description"  : "On the planet Aqua, a world once known as Mars, Mizunashi Akari has just made her home in the town of Neo-VENEZIA, a futuristic imitation of the ancient city of Venice. The technology of \"Man Home\" (formerly Earth) has not entirely reached this planet, and Akari is alone, having no contact with family or friends. Nonetheless, the town, with its charming labyrinths of rivers and canals, becomes Akari's new infatuation, along with the dream of becoming a full-fledged gondolier. Reverting to a more \"primitive\" lifestyle and pursuing a new trade, the character of Akari becomes both adventurous and heartwarming all at once.",
+    "extension"    : "png",
+    "filename"     : r"re:0067\.5-0\d\d",
+    "width"        : {1129, 2133},
+    "height"       : {1511, 1600},
+    "lang"         : "en",
+    "language"     : "English",
+    "manga"        : "Aria",
+    "manga_id"     : "01J76XY8G1GK8EJ9VQG92C3DKM",
+    "official"     : True,
+    "page"         : range(1, 17),
+    "release"      : "2002",
+    "status"       : "Complete",
+    "type"         : "Manga",
+    "tags"         : [
+        "Adventure",
+        "Comedy",
+        "Drama",
+        "Sci-fi",
+        "Shounen",
+        "Slice of Life",
+    ],
+},
+
+{
+    "#url"     : "https://weebcentral.com/series/01J76XY8G1GK8EJ9VQG92C3DKM/Aria",
+    "#class"   : weebcentral.WeebcentralMangaExtractor,
+    "#pattern" : weebcentral.WeebcentralChapterExtractor.pattern,
+    "#count"   : 75,
+
+    "author"       : ["AMANO Kozue"],
+    "chapter"      : range(1, 70),
+    "chapter_id"   : r"re:01J\w{23}",
+    "chapter_minor": {"", ".5"},
+    "chapter_type" : "Navigation",
+    "date"         : "type:datetime",
+    "description"  : "On the planet Aqua, a world once known as Mars, Mizunashi Akari has just made her home in the town of Neo-VENEZIA, a futuristic imitation of the ancient city of Venice. The technology of \"Man Home\" (formerly Earth) has not entirely reached this planet, and Akari is alone, having no contact with family or friends. Nonetheless, the town, with its charming labyrinths of rivers and canals, becomes Akari's new infatuation, along with the dream of becoming a full-fledged gondolier. Reverting to a more \"primitive\" lifestyle and pursuing a new trade, the character of Akari becomes both adventurous and heartwarming all at once.",
+    "lang"         : "en",
+    "language"     : "English",
+    "manga"        : "Aria",
+    "manga_id"     : "01J76XY8G1GK8EJ9VQG92C3DKM",
+    "official"     : True,
+    "release"      : "2002",
+    "status"       : "Complete",
+    "type"         : "Manga",
+    "tags"         : [
+        "Adventure",
+        "Comedy",
+        "Drama",
+        "Sci-fi",
+        "Shounen",
+        "Slice of Life",
+    ],
+},
+
+)