diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py index 653b73f1..2c45bf3a 100644 --- a/gallery_dl/extractor/agnph.py +++ b/gallery_dl/extractor/agnph.py @@ -11,7 +11,6 @@ from . import booru from .. import text -from xml.etree import ElementTree import collections import re @@ -52,8 +51,7 @@ class AgnphExtractor(booru.BooruExtractor): params["page"] = self.page_start while True: - data = self.request(url, params=params).text - root = ElementTree.fromstring(data) + root = self.request_xml(url, params=params) yield from map(self._xml_to_dict, root) @@ -109,5 +107,5 @@ class AgnphPostExtractor(AgnphExtractor): def posts(self): url = "{}/gallery/post/show/{}/?api=xml".format( self.root, self.groups[0]) - post = ElementTree.fromstring(self.request(url).text) + post = self.request_xml(url) return (self._xml_to_dict(post),) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index b5cce47c..60a0f2c8 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -20,6 +20,7 @@ import logging import datetime import requests import threading +from xml.etree import ElementTree from requests.adapters import HTTPAdapter from .message import Message from .. import config, output, text, util, cache, exception @@ -252,6 +253,23 @@ class Extractor(): kwargs.setdefault("allow_redirects", False) return self.request(url, **kwargs).headers.get("location", "") + def request_xml(self, url, xmlns=True, **kwargs): + text = self.request(url, **kwargs).text + + if not xmlns: + text = text.replace(" xmlns=", " ns=") + + parser = ElementTree.XMLParser() + try: + parser.feed(text) + return parser.close() + except Exception as exc: + fatal = kwargs.get("fatal", True) + if not fatal or fatal is ...: + self.log.warning("%s: %s", exc.__class__.__name__, exc) + return ElementTree.Element("") + raise + _handle_429 = util.false def wait(self, seconds=None, until=None, adjust=1.0, diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index c34c1520..e24b6435 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -10,7 +10,6 @@ from .common import ChapterExtractor, MangaExtractor, Extractor, Message from .. import text, util -from xml.etree import ElementTree import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com" @@ -143,9 +142,8 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor): example = "https://dynasty-scans.com/anthologies/TITLE" def items(self): - url = "{}/anthologies/{}".format(self.root, self.groups[0]) - xml = self.request(url + ".atom").text - root = ElementTree.fromstring(xml.replace(" xmlns=", " ns=")) + url = "{}/anthologies/{}.atom".format(self.root, self.groups[0]) + root = self.request_xml(url, xmlns=False) data = { "_extractor": DynastyscansChapterExtractor, @@ -153,7 +151,7 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor): } if self.config("metadata", False): - page = self.request(url).text + page = self.request(url[:-5]).text alert = text.extr(page, "