[weebcentral] add support (#6778)

This commit is contained in:
Mike Fährmann
2025-01-10 22:08:01 +01:00
parent 4853406fe3
commit 1d75c8308c
7 changed files with 229 additions and 1 deletions

View File

@@ -400,6 +400,7 @@ Default
``soundgasm``,
``urlgalleries``,
``vk``,
``weebcentral``,
``zerochan``
* ``"1.0-2.0"``
``flickr``,

View File

@@ -435,7 +435,8 @@
{
"cookies": null,
"files" : ["images", "image_large", "attachments", "postfile", "content"]
"files" : ["images", "image_large", "attachments", "postfile", "content"],
"format-images": "download_url"
},
"pillowfort":
{
@@ -701,6 +702,10 @@
"api-key" : null,
"metadata": false
},
"weebcentral":
{
"sleep-request": "0.5-1.5"
},
"weibo":
{
"sleep-request": "1.0-2.0",

View File

@@ -1051,6 +1051,12 @@ Consider all listed sites to potentially be NSFW.
<td>Comics, Episodes</td>
<td></td>
</tr>
<tr>
<td>Weeb Central</td>
<td>https://weebcentral.com/</td>
<td>Chapters, Manga</td>
<td></td>
</tr>
<tr>
<td>Weibo</td>
<td>https://www.weibo.com/</td>

View File

@@ -190,6 +190,7 @@ modules = [
"weasyl",
"webmshare",
"webtoons",
"weebcentral",
"weibo",
"wikiart",
"wikifeet",

View File

@@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://weebcentral.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?weebcentral\.com"
class WeebcentralBase():
category = "weebcentral"
root = "https://weebcentral.com"
request_interval = (0.5, 1.5)
@memcache(keyarg=1)
def _extract_manga_data(self, manga_id):
url = "{}/series/{}".format(self.root, manga_id)
page = self.request(url).text
extr = text.extract_from(page)
return {
"manga_id": manga_id,
"lang" : "en",
"language": "English",
"manga" : text.unescape(extr("<title>", " | Weeb Central")),
"author" : text.split_html(extr("<strong>Author", "</li>"))[1::2],
"tags" : text.split_html(extr("<strong>Tag", "</li>"))[1::2],
"type" : text.remove_html(extr("<strong>Type: ", "</li>")),
"status" : text.remove_html(extr("<strong>Status: ", "</li>")),
"release" : text.remove_html(extr("<strong>Released: ", "</li>")),
"official": ">Yes" in extr("<strong>Official Translatio", "</li>"),
"description": text.unescape(text.remove_html(extr(
"<strong>Description", "</li>"))),
}
class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
"""Extractor for manga chapters from weebcentral.com"""
pattern = BASE_PATTERN + r"(/chapters/(\w+))"
example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV"
def metadata(self, page):
extr = text.extract_from(page)
manga_id = extr("'series_id': '", "'")
data = self._extract_manga_data(manga_id)
data["chapter_id"] = self.groups[1]
data["chapter_type"] = extr("'chapter_type': '", "'")
chapter, sep, minor = extr("'number': '", "'").partition(".")
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
return data
def images(self, page):
referer = self.gallery_url
url = referer + "/images"
params = {
"is_prev" : "False",
"current_page" : "1",
"reading_style": "long_strip",
}
headers = {
"Accept" : "*/*",
"Referer" : referer,
"HX-Request" : "true",
"HX-Current-URL": referer,
}
page = self.request(url, params=params, headers=headers).text
extr = text.extract_from(page)
results = []
while True:
src = extr(' src="', '"')
if not src:
break
results.append((src, {
"width" : text.parse_int(extr(' width="' , '"')),
"height": text.parse_int(extr(' height="', '"')),
}))
return results
class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
"""Extractor for manga from weebcentral.com"""
chapterclass = WeebcentralChapterExtractor
pattern = BASE_PATTERN + r"/series/(\w+)"
example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE"
def __init__(self, match):
MangaExtractor.__init__(self, match, False)
def chapters(self, _):
manga_id = self.groups[0]
referer = "{}/series/{}".format(self.root, manga_id)
url = referer + "/full-chapter-list"
headers = {
"Accept" : "*/*",
"Referer" : referer,
"HX-Request" : "true",
"HX-Target" : "chapter-list",
"HX-Current-URL": referer,
}
page = self.request(url, headers=headers).text
extr = text.extract_from(page)
data = self._extract_manga_data(manga_id)
base = self.root + "/chapters/"
results = []
while True:
chapter_id = extr("/chapters/", '"')
if not chapter_id:
break
type, _, chapter = extr('<span class="">', "<").partition(" ")
chapter, sep, minor = chapter.partition(".")
chapter = {
"chapter_id" : chapter_id,
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_type" : type,
"date" : text.parse_datetime(
extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"),
}
chapter.update(data)
results.append((base + chapter_id, chapter))
return results

View File

@@ -155,6 +155,7 @@ CATEGORY_MAP = {
"wallpapercave" : "Wallpaper Cave",
"webmshare" : "webmshare",
"webtoons" : "Webtoon",
"weebcentral" : "Weeb Central",
"wikiart" : "WikiArt.org",
"wikigg" : "wiki.gg",
"wikimediacommons": "Wikimedia Commons",

View File

@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import weebcentral
__tests__ = (
{
"#url" : "https://weebcentral.com/chapters/01J76XZ4PC3VW91BYFBQJA44C3",
"#class" : weebcentral.WeebcentralChapterExtractor,
"#pattern" : r"https://official\.lowee\.us/manga/Aria/0067\.5-0\d\d\.png",
"#count" : 17,
"author" : ["AMANO Kozue"],
"chapter" : 67,
"chapter_id" : "01J76XZ4PC3VW91BYFBQJA44C3",
"chapter_minor": ".5",
"chapter_type" : "Navigation",
"count" : 17,
"description" : "On the planet Aqua, a world once known as Mars, Mizunashi Akari has just made her home in the town of Neo-VENEZIA, a futuristic imitation of the ancient city of Venice. The technology of \"Man Home\" (formerly Earth) has not entirely reached this planet, and Akari is alone, having no contact with family or friends. Nonetheless, the town, with its charming labyrinths of rivers and canals, becomes Akari's new infatuation, along with the dream of becoming a full-fledged gondolier. Reverting to a more \"primitive\" lifestyle and pursuing a new trade, the character of Akari becomes both adventurous and heartwarming all at once.",
"extension" : "png",
"filename" : r"re:0067\.5-0\d\d",
"width" : {1129, 2133},
"height" : {1511, 1600},
"lang" : "en",
"language" : "English",
"manga" : "Aria",
"manga_id" : "01J76XY8G1GK8EJ9VQG92C3DKM",
"official" : True,
"page" : range(1, 17),
"release" : "2002",
"status" : "Complete",
"type" : "Manga",
"tags" : [
"Adventure",
"Comedy",
"Drama",
"Sci-fi",
"Shounen",
"Slice of Life",
],
},
{
"#url" : "https://weebcentral.com/series/01J76XY8G1GK8EJ9VQG92C3DKM/Aria",
"#class" : weebcentral.WeebcentralMangaExtractor,
"#pattern" : weebcentral.WeebcentralChapterExtractor.pattern,
"#count" : 75,
"author" : ["AMANO Kozue"],
"chapter" : range(1, 70),
"chapter_id" : r"re:01J\w{23}",
"chapter_minor": {"", ".5"},
"chapter_type" : "Navigation",
"date" : "type:datetime",
"description" : "On the planet Aqua, a world once known as Mars, Mizunashi Akari has just made her home in the town of Neo-VENEZIA, a futuristic imitation of the ancient city of Venice. The technology of \"Man Home\" (formerly Earth) has not entirely reached this planet, and Akari is alone, having no contact with family or friends. Nonetheless, the town, with its charming labyrinths of rivers and canals, becomes Akari's new infatuation, along with the dream of becoming a full-fledged gondolier. Reverting to a more \"primitive\" lifestyle and pursuing a new trade, the character of Akari becomes both adventurous and heartwarming all at once.",
"lang" : "en",
"language" : "English",
"manga" : "Aria",
"manga_id" : "01J76XY8G1GK8EJ9VQG92C3DKM",
"official" : True,
"release" : "2002",
"status" : "Complete",
"type" : "Manga",
"tags" : [
"Adventure",
"Comedy",
"Drama",
"Sci-fi",
"Shounen",
"Slice of Life",
],
},
)