[common] add 'request_xml()' convenience function

This commit is contained in:
Mike Fährmann
2025-06-04 22:12:00 +02:00
parent 38116ce04c
commit a7bbccbd7b
5 changed files with 27 additions and 16 deletions

View File

@@ -11,7 +11,6 @@
from . import booru
from .. import text
from xml.etree import ElementTree
import collections
import re
@@ -52,8 +51,7 @@ class AgnphExtractor(booru.BooruExtractor):
params["page"] = self.page_start
while True:
data = self.request(url, params=params).text
root = ElementTree.fromstring(data)
root = self.request_xml(url, params=params)
yield from map(self._xml_to_dict, root)
@@ -109,5 +107,5 @@ class AgnphPostExtractor(AgnphExtractor):
def posts(self):
url = "{}/gallery/post/show/{}/?api=xml".format(
self.root, self.groups[0])
post = ElementTree.fromstring(self.request(url).text)
post = self.request_xml(url)
return (self._xml_to_dict(post),)

View File

@@ -20,6 +20,7 @@ import logging
import datetime
import requests
import threading
from xml.etree import ElementTree
from requests.adapters import HTTPAdapter
from .message import Message
from .. import config, output, text, util, cache, exception
@@ -252,6 +253,23 @@ class Extractor():
kwargs.setdefault("allow_redirects", False)
return self.request(url, **kwargs).headers.get("location", "")
def request_xml(self, url, xmlns=True, **kwargs):
text = self.request(url, **kwargs).text
if not xmlns:
text = text.replace(" xmlns=", " ns=")
parser = ElementTree.XMLParser()
try:
parser.feed(text)
return parser.close()
except Exception as exc:
fatal = kwargs.get("fatal", True)
if not fatal or fatal is ...:
self.log.warning("%s: %s", exc.__class__.__name__, exc)
return ElementTree.Element("")
raise
_handle_429 = util.false
def wait(self, seconds=None, until=None, adjust=1.0,

View File

@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
from .. import text, util
from xml.etree import ElementTree
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
@@ -143,9 +142,8 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
example = "https://dynasty-scans.com/anthologies/TITLE"
def items(self):
url = "{}/anthologies/{}".format(self.root, self.groups[0])
xml = self.request(url + ".atom").text
root = ElementTree.fromstring(xml.replace(" xmlns=", " ns="))
url = "{}/anthologies/{}.atom".format(self.root, self.groups[0])
root = self.request_xml(url, xmlns=False)
data = {
"_extractor": DynastyscansChapterExtractor,
@@ -153,7 +151,7 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
}
if self.config("metadata", False):
page = self.request(url).text
page = self.request(url[:-5]).text
alert = text.extr(page, "<div class='alert", "</div>")
data["alert"] = text.split_html(alert)[1:] if alert else ()

View File

@@ -11,7 +11,6 @@
from . import booru
from .. import text, util, exception
from xml.etree import ElementTree
import collections
import re
@@ -26,7 +25,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _api_request(self, params):
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
return self.request_xml(url, params=params)
def _pagination(self, params):
params["pid"] = self.page_start
@@ -38,7 +37,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
while True:
try:
root = self._api_request(params)
except ElementTree.ParseError:
except SyntaxError: # ElementTree.ParseError
if "tags" not in params or post is None:
raise
taglist = [tag for tag in params["tags"].split()

View File

@@ -12,8 +12,6 @@ from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
from xml.etree import ElementTree
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
@@ -130,7 +128,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
def posts(self):
url = "{}/vr.php?t={}".format(self.root, self.thread_id)
return ElementTree.fromstring(self.request(url).text)
return self.request_xml(url)
class VipergirlsPostExtractor(VipergirlsExtractor):
@@ -147,4 +145,4 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
def posts(self):
url = "{}/vr.php?p={}".format(self.root, self.post_id)
return ElementTree.fromstring(self.request(url).text)
return self.request_xml(url)