unescape items in text.split_html()

This commit is contained in:
Mike Fährmann
2021-03-29 02:12:29 +02:00
parent 36291176bc
commit 387fe415d5
4 changed files with 13 additions and 9 deletions

View File

@@ -126,8 +126,8 @@ class AryionExtractor(Extractor):
"user" : self.user or artist,
"title" : title,
"artist": artist,
"path" : text.split_html(text.unescape(extr(
"cookiecrumb'>", '</span')))[4:-1:2],
"path" : text.split_html(extr(
"cookiecrumb'>", '</span'))[4:-1:2],
"date" : extr("class='pretty-date' title='", "'"),
"size" : text.parse_int(clen),
"views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),

View File

@@ -1,19 +1,18 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann
# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://dynasty-scans.com/"""
"""Extractors for https://dynasty-scans.com/"""
from .common import ChapterExtractor, Extractor, Message
from .. import text
import json
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
@@ -36,7 +35,7 @@ class DynastyscansBase():
return {
"url" : self.root + url,
"image_id": text.parse_int(image_id),
"tags" : text.split_html(text.unescape(tags)),
"tags" : text.split_html(tags),
"date" : text.remove_html(date),
"source" : text.unescape(src),
}