[manganelo] fix 'manga' extractor (#9059)

This commit is contained in:
Mike Fährmann
2026-02-14 09:17:48 +01:00
parent f1da162d72
commit d99c8c1320
2 changed files with 28 additions and 28 deletions

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2020 Jake Mannens # Copyright 2020 Jake Mannens
# Copyright 2021-2025 Mike Fährmann # Copyright 2021-2026 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -96,34 +96,36 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
def chapters(self, page): def chapters(self, page):
extr = text.extract_from(page) extr = text.extract_from(page)
manga = text.unescape(extr("<h1>", "<")) url = extr('property="og:url" content="', '"')
author = text.remove_html(extr("<li>Author(s) :", "</a>")) slug = url[url.rfind("/")+1:]
status = extr("<li>Status :", "<").strip()
update = self.parse_datetime(extr( info = {
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p") "manga" : text.unescape(extr("<h1>", "<")),
tags = text.split_html(extr(">Genres :", "</li>"))[::2] "manga_url" : url,
"manga_slug": slug,
"author": text.remove_html(extr("<li>Author(s) :", "</li>")),
"status": extr("<li>Status :", "<").strip(),
"date_updated": self.parse_datetime(extr(
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p"),
"tags" : text.split_html(extr(">Genres :", "</li>"))[::2],
"lang" : "en",
}
info["tags"].sort()
base = url + "/"
url = f"{self.root}/api/manga/{slug}/chapters?limit=-1"
results = [] results = []
for chapter in text.extract_iter(page, '<div class="row">', '</div>'): data = self.request_json(url)["data"]
url, pos = text.extract(chapter, '<a href="', '"') for ch in data["chapters"]:
title, pos = text.extract(chapter, '>', '</a>', pos) slug = ch["chapter_slug"]
date, pos = text.extract(chapter, '<span title="', '"', pos) chapter, sep, minor = slug[8:].partition("-")
chapter, sep, minor = url.rpartition("/chapter-")[2].partition("-") results.append((base + slug, {
**info,
if url[0] == "/": "chapter": text.parse_int(chapter),
url = self.root + url
results.append((url, {
"manga" : manga,
"author" : author,
"status" : status,
"tags" : tags,
"date_updated": update,
"chapter" : text.parse_int(chapter),
"chapter_minor": (sep and ".") + minor, "chapter_minor": (sep and ".") + minor,
"title" : title.partition(": ")[2], "date" : self.parse_datetime_iso(ch["updated_at"]),
"date" : self.parse_datetime(date, "%b-%d-%Y %H:%M"), "views" : ch["view"],
"lang" : "en",
"language": "English",
})) }))
return results return results

View File

@@ -50,10 +50,8 @@ __tests__ = (
"date" : "type:datetime", "date" : "type:datetime",
"date_updated": "dt:2024-10-30 10:20:58", "date_updated": "dt:2024-10-30 10:20:58",
"lang" : "en", "lang" : "en",
"language": "English",
"manga" : "Aria", "manga" : "Aria",
"status" : "Completed", "status" : "Completed",
"title" : "",
"tags": [ "tags": [
"Adventure", "Adventure",
"Comedy", "Comedy",