[dynastyscans] add 'anthology' extractor (#7627)

This commit is contained in:
Mike Fährmann
2025-06-04 20:16:37 +02:00
parent b5334f5837
commit 685836f6fd
5 changed files with 109 additions and 1 deletions

View File

@@ -10,6 +10,7 @@
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
from .. import text, util
from xml.etree import ElementTree
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
@@ -133,3 +134,44 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor):
def images(self):
return (self.query,)
class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
"""Extractor for dynasty-scans anthologies"""
subcategory = "anthology"
pattern = BASE_PATTERN + r"/anthologies/([^/?#]+)"
example = "https://dynasty-scans.com/anthologies/TITLE"
def items(self):
url = "{}/anthologies/{}".format(self.root, self.groups[0])
xml = self.request(url + ".atom").text
root = ElementTree.fromstring(xml.replace(" xmlns=", " ns="))
data = {
"_extractor": DynastyscansChapterExtractor,
"anthology" : root[3].text[28:],
}
if self.config("metadata", False):
page = self.request(url).text
alert = text.extr(page, "<div class='alert", "</div>")
data["alert"] = text.split_html(alert)[1:] if alert else ()
data["status"] = text.extr(
page, "<small>&mdash; ", "</small>")
data["description"] = text.extr(
page, "<div class='description'>", "</div>")
for element in root:
if element.tag != "entry":
continue
content = element[6][0]
data["author"] = content[0].text[8:]
data["scanlator"] = content[1].text[11:]
data["tags"] = content[2].text[6:].lower().split(", ")
data["title"] = element[5].text
data["date"] = text.parse_datetime(
element[1].text, "%Y-%m-%dT%H:%M:%S%z")
data["date_updated"] = text.parse_datetime(
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
yield Message.Queue, element[4].text, data