[manganelo] fix 'manga' extractor (#9059)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2020 Jake Mannens
|
# Copyright 2020 Jake Mannens
|
||||||
# Copyright 2021-2025 Mike Fährmann
|
# Copyright 2021-2026 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -96,34 +96,36 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
|
|||||||
def chapters(self, page):
|
def chapters(self, page):
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
|
|
||||||
manga = text.unescape(extr("<h1>", "<"))
|
url = extr('property="og:url" content="', '"')
|
||||||
author = text.remove_html(extr("<li>Author(s) :", "</a>"))
|
slug = url[url.rfind("/")+1:]
|
||||||
status = extr("<li>Status :", "<").strip()
|
|
||||||
update = self.parse_datetime(extr(
|
info = {
|
||||||
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p")
|
"manga" : text.unescape(extr("<h1>", "<")),
|
||||||
tags = text.split_html(extr(">Genres :", "</li>"))[::2]
|
"manga_url" : url,
|
||||||
|
"manga_slug": slug,
|
||||||
|
"author": text.remove_html(extr("<li>Author(s) :", "</li>")),
|
||||||
|
"status": extr("<li>Status :", "<").strip(),
|
||||||
|
"date_updated": self.parse_datetime(extr(
|
||||||
|
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p"),
|
||||||
|
"tags" : text.split_html(extr(">Genres :", "</li>"))[::2],
|
||||||
|
"lang" : "en",
|
||||||
|
}
|
||||||
|
info["tags"].sort()
|
||||||
|
|
||||||
|
base = url + "/"
|
||||||
|
url = f"{self.root}/api/manga/{slug}/chapters?limit=-1"
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for chapter in text.extract_iter(page, '<div class="row">', '</div>'):
|
data = self.request_json(url)["data"]
|
||||||
url, pos = text.extract(chapter, '<a href="', '"')
|
for ch in data["chapters"]:
|
||||||
title, pos = text.extract(chapter, '>', '</a>', pos)
|
slug = ch["chapter_slug"]
|
||||||
date, pos = text.extract(chapter, '<span title="', '"', pos)
|
chapter, sep, minor = slug[8:].partition("-")
|
||||||
chapter, sep, minor = url.rpartition("/chapter-")[2].partition("-")
|
results.append((base + slug, {
|
||||||
|
**info,
|
||||||
if url[0] == "/":
|
"chapter": text.parse_int(chapter),
|
||||||
url = self.root + url
|
|
||||||
results.append((url, {
|
|
||||||
"manga" : manga,
|
|
||||||
"author" : author,
|
|
||||||
"status" : status,
|
|
||||||
"tags" : tags,
|
|
||||||
"date_updated": update,
|
|
||||||
"chapter" : text.parse_int(chapter),
|
|
||||||
"chapter_minor": (sep and ".") + minor,
|
"chapter_minor": (sep and ".") + minor,
|
||||||
"title" : title.partition(": ")[2],
|
"date" : self.parse_datetime_iso(ch["updated_at"]),
|
||||||
"date" : self.parse_datetime(date, "%b-%d-%Y %H:%M"),
|
"views" : ch["view"],
|
||||||
"lang" : "en",
|
|
||||||
"language": "English",
|
|
||||||
}))
|
}))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|||||||
@@ -50,10 +50,8 @@ __tests__ = (
|
|||||||
"date" : "type:datetime",
|
"date" : "type:datetime",
|
||||||
"date_updated": "dt:2024-10-30 10:20:58",
|
"date_updated": "dt:2024-10-30 10:20:58",
|
||||||
"lang" : "en",
|
"lang" : "en",
|
||||||
"language": "English",
|
|
||||||
"manga" : "Aria",
|
"manga" : "Aria",
|
||||||
"status" : "Completed",
|
"status" : "Completed",
|
||||||
"title" : "",
|
|
||||||
"tags": [
|
"tags": [
|
||||||
"Adventure",
|
"Adventure",
|
||||||
"Comedy",
|
"Comedy",
|
||||||
|
|||||||
Reference in New Issue
Block a user