[common] add 'request_xml()' convenience function

This commit is contained in:
Mike Fährmann
2025-06-04 22:12:00 +02:00
parent 38116ce04c
commit a7bbccbd7b
5 changed files with 27 additions and 16 deletions

View File

@@ -11,7 +11,6 @@
from . import booru from . import booru
from .. import text from .. import text
from xml.etree import ElementTree
import collections import collections
import re import re
@@ -52,8 +51,7 @@ class AgnphExtractor(booru.BooruExtractor):
params["page"] = self.page_start params["page"] = self.page_start
while True: while True:
data = self.request(url, params=params).text root = self.request_xml(url, params=params)
root = ElementTree.fromstring(data)
yield from map(self._xml_to_dict, root) yield from map(self._xml_to_dict, root)
@@ -109,5 +107,5 @@ class AgnphPostExtractor(AgnphExtractor):
def posts(self): def posts(self):
url = "{}/gallery/post/show/{}/?api=xml".format( url = "{}/gallery/post/show/{}/?api=xml".format(
self.root, self.groups[0]) self.root, self.groups[0])
post = ElementTree.fromstring(self.request(url).text) post = self.request_xml(url)
return (self._xml_to_dict(post),) return (self._xml_to_dict(post),)

View File

@@ -20,6 +20,7 @@ import logging
import datetime import datetime
import requests import requests
import threading import threading
from xml.etree import ElementTree
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from .message import Message from .message import Message
from .. import config, output, text, util, cache, exception from .. import config, output, text, util, cache, exception
@@ -252,6 +253,23 @@ class Extractor():
kwargs.setdefault("allow_redirects", False) kwargs.setdefault("allow_redirects", False)
return self.request(url, **kwargs).headers.get("location", "") return self.request(url, **kwargs).headers.get("location", "")
def request_xml(self, url, xmlns=True, **kwargs):
text = self.request(url, **kwargs).text
if not xmlns:
text = text.replace(" xmlns=", " ns=")
parser = ElementTree.XMLParser()
try:
parser.feed(text)
return parser.close()
except Exception as exc:
fatal = kwargs.get("fatal", True)
if not fatal or fatal is ...:
self.log.warning("%s: %s", exc.__class__.__name__, exc)
return ElementTree.Element("")
raise
_handle_429 = util.false _handle_429 = util.false
def wait(self, seconds=None, until=None, adjust=1.0, def wait(self, seconds=None, until=None, adjust=1.0,

View File

@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor, Extractor, Message from .common import ChapterExtractor, MangaExtractor, Extractor, Message
from .. import text, util from .. import text, util
from xml.etree import ElementTree
import re import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
@@ -143,9 +142,8 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
example = "https://dynasty-scans.com/anthologies/TITLE" example = "https://dynasty-scans.com/anthologies/TITLE"
def items(self): def items(self):
url = "{}/anthologies/{}".format(self.root, self.groups[0]) url = "{}/anthologies/{}.atom".format(self.root, self.groups[0])
xml = self.request(url + ".atom").text root = self.request_xml(url, xmlns=False)
root = ElementTree.fromstring(xml.replace(" xmlns=", " ns="))
data = { data = {
"_extractor": DynastyscansChapterExtractor, "_extractor": DynastyscansChapterExtractor,
@@ -153,7 +151,7 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
} }
if self.config("metadata", False): if self.config("metadata", False):
page = self.request(url).text page = self.request(url[:-5]).text
alert = text.extr(page, "<div class='alert", "</div>") alert = text.extr(page, "<div class='alert", "</div>")
data["alert"] = text.split_html(alert)[1:] if alert else () data["alert"] = text.split_html(alert)[1:] if alert else ()

View File

@@ -11,7 +11,6 @@
from . import booru from . import booru
from .. import text, util, exception from .. import text, util, exception
from xml.etree import ElementTree
import collections import collections
import re import re
@@ -26,7 +25,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _api_request(self, params): def _api_request(self, params):
url = self.root_api + "/index.php?page=dapi&s=post&q=index" url = self.root_api + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text) return self.request_xml(url, params=params)
def _pagination(self, params): def _pagination(self, params):
params["pid"] = self.page_start params["pid"] = self.page_start
@@ -38,7 +37,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
while True: while True:
try: try:
root = self._api_request(params) root = self._api_request(params)
except ElementTree.ParseError: except SyntaxError: # ElementTree.ParseError
if "tags" not in params or post is None: if "tags" not in params or post is None:
raise raise
taglist = [tag for tag in params["tags"].split() taglist = [tag for tag in params["tags"].split()

View File

@@ -12,8 +12,6 @@ from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
from xml.etree import ElementTree
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to" BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
@@ -130,7 +128,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
def posts(self): def posts(self):
url = "{}/vr.php?t={}".format(self.root, self.thread_id) url = "{}/vr.php?t={}".format(self.root, self.thread_id)
return ElementTree.fromstring(self.request(url).text) return self.request_xml(url)
class VipergirlsPostExtractor(VipergirlsExtractor): class VipergirlsPostExtractor(VipergirlsExtractor):
@@ -147,4 +145,4 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
def posts(self): def posts(self):
url = "{}/vr.php?p={}".format(self.root, self.post_id) url = "{}/vr.php?p={}".format(self.root, self.post_id)
return ElementTree.fromstring(self.request(url).text) return self.request_xml(url)