[dynastyscans] add 'anthology' extractor (#7627)

This commit is contained in:
Mike Fährmann
2025-06-04 20:16:37 +02:00
parent b5334f5837
commit 685836f6fd
5 changed files with 109 additions and 1 deletions

View File

@@ -2389,6 +2389,17 @@ Description
You can follow `this guide <https://github.com/Tyrrrz/DiscordChatExporter/blob/master/.docs/Token-and-IDs.md#how-to-get-a-user-token>`__ to get a token. You can follow `this guide <https://github.com/Tyrrrz/DiscordChatExporter/blob/master/.docs/Token-and-IDs.md#how-to-get-a-user-token>`__ to get a token.
extractor.dynastyscans.anthology.metadata
-----------------------------------------
Type
``bool``
Default
``false``
Description
Extract ``alert``, ``description``, and ``status`` metadata
from an anthology's HTML page.
extractor.[E621].metadata extractor.[E621].metadata
------------------------- -------------------------
Type Type

View File

@@ -253,6 +253,12 @@
"subfolders": true "subfolders": true
} }
}, },
"dynastyscans":
{
"anthology": {
"metadata": false
}
},
"exhentai": "exhentai":
{ {
"username": "", "username": "",

View File

@@ -220,7 +220,7 @@ Consider all listed sites to potentially be NSFW.
<tr> <tr>
<td>Dynasty Reader</td> <td>Dynasty Reader</td>
<td>https://dynasty-scans.com/</td> <td>https://dynasty-scans.com/</td>
<td>Chapters, individual Images, Manga, Search Results</td> <td>Anthologies, Chapters, individual Images, Manga, Search Results</td>
<td></td> <td></td>
</tr> </tr>
<tr> <tr>

View File

@@ -10,6 +10,7 @@
from .common import ChapterExtractor, MangaExtractor, Extractor, Message from .common import ChapterExtractor, MangaExtractor, Extractor, Message
from .. import text, util from .. import text, util
from xml.etree import ElementTree
import re import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
@@ -133,3 +134,44 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor):
def images(self): def images(self):
return (self.query,) return (self.query,)
class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
"""Extractor for dynasty-scans anthologies"""
subcategory = "anthology"
pattern = BASE_PATTERN + r"/anthologies/([^/?#]+)"
example = "https://dynasty-scans.com/anthologies/TITLE"
def items(self):
url = "{}/anthologies/{}".format(self.root, self.groups[0])
xml = self.request(url + ".atom").text
root = ElementTree.fromstring(xml.replace(" xmlns=", " ns="))
data = {
"_extractor": DynastyscansChapterExtractor,
"anthology" : root[3].text[28:],
}
if self.config("metadata", False):
page = self.request(url).text
alert = text.extr(page, "<div class='alert", "</div>")
data["alert"] = text.split_html(alert)[1:] if alert else ()
data["status"] = text.extr(
page, "<small>&mdash; ", "</small>")
data["description"] = text.extr(
page, "<div class='description'>", "</div>")
for element in root:
if element.tag != "entry":
continue
content = element[6][0]
data["author"] = content[0].text[8:]
data["scanlator"] = content[1].text[11:]
data["tags"] = content[2].text[6:].lower().split(", ")
data["title"] = element[5].text
data["date"] = text.parse_datetime(
element[1].text, "%Y-%m-%dT%H:%M:%S%z")
data["date_updated"] = text.parse_datetime(
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
yield Message.Queue, element[4].text, data

View File

@@ -63,4 +63,53 @@ __tests__ = (
"#sha1_metadata": "9f6fd139c372203dcf7237e662a80963ab070cb0", "#sha1_metadata": "9f6fd139c372203dcf7237e662a80963ab070cb0",
}, },
{
"#url" : "https://dynasty-scans.com/anthologies/%C3%A9clair",
"#class" : dynastyscans.DynastyscansAnthologyExtractor,
"#pattern" : dynastyscans.DynastyscansChapterExtractor.pattern,
"#options" : {"metadata": True},
"#count" : 8,
"alert": [
"This manga has been licensed",
"Content licensed for English release has been removed from the reader. You can support the author by purchasing the title when it becomes available.",
],
"anthology" : "Éclair",
"author" : {"Canno", "Kawanami Izumi", "Kagero", "Mekimeki Oukoku", "Itou Hachi", "Isaki Uta", "Nakatani Nio", "Kitao Taki"},
"date" : "type:datetime",
"date_updated" : "type:datetime",
"description" : "<p>A compilation of one-shots from some of the best and most popular recent Yuri mangaka, including Canno (A Kiss and a White Lily for my Dearest Girl), Nakatani Nio (Bloom into you), Amano Shunita (Ayame 14), Itou Hachi (Isn't the Moon Beautiful?/Sayuri's Sister is an Angel) and many more.</p>\n\n<p>A must have for any collection, in my opinion, and a great chance to support all of the fabulous artists at once by buying yourself a copy! - Estherlea</p>",
"scanlator" : {"Estherlea", "/u/ Scanlations"},
"status" : "Licensed",
"title" : str,
"tags" : list,
},
{
"#url" : "https://dynasty-scans.com/anthologies/aashi_to_watashi_gyaru_yuri_anthology",
"#class" : dynastyscans.DynastyscansAnthologyExtractor,
"#urls" : "https://dynasty-scans.com/chapters/dont_call_me_senpai",
"!alert" : (),
"!description" : """<p><a href="https://dynasty-scans.com/anthologies/aashi_to_watashi_gyaru_yuri_anthology_volume_2">Volume 2</a></p>""",
"!status" : "",
"anthology" : "Aashi to Watashi - Gyaru Yuri Anthology",
"author" : "keyyan",
"date" : "dt:2024-03-30 04:07:10",
"date_updated" : "dt:2025-04-04 20:21:36",
"scanlator" : "Arka",
"title" : '''Don't Call Me "Senpai"''',
"tags" : [
"big breasts",
"childhood friends",
"ecchi",
"gyaru",
"height gap",
"prequel",
"romance",
"school girl",
"yuri",
],
},
) )