[common] add 'request_xml()' convenience function
This commit is contained in:
@@ -11,7 +11,6 @@
|
||||
from . import booru
|
||||
from .. import text
|
||||
|
||||
from xml.etree import ElementTree
|
||||
import collections
|
||||
import re
|
||||
|
||||
@@ -52,8 +51,7 @@ class AgnphExtractor(booru.BooruExtractor):
|
||||
params["page"] = self.page_start
|
||||
|
||||
while True:
|
||||
data = self.request(url, params=params).text
|
||||
root = ElementTree.fromstring(data)
|
||||
root = self.request_xml(url, params=params)
|
||||
|
||||
yield from map(self._xml_to_dict, root)
|
||||
|
||||
@@ -109,5 +107,5 @@ class AgnphPostExtractor(AgnphExtractor):
|
||||
def posts(self):
|
||||
url = "{}/gallery/post/show/{}/?api=xml".format(
|
||||
self.root, self.groups[0])
|
||||
post = ElementTree.fromstring(self.request(url).text)
|
||||
post = self.request_xml(url)
|
||||
return (self._xml_to_dict(post),)
|
||||
|
||||
@@ -20,6 +20,7 @@ import logging
|
||||
import datetime
|
||||
import requests
|
||||
import threading
|
||||
from xml.etree import ElementTree
|
||||
from requests.adapters import HTTPAdapter
|
||||
from .message import Message
|
||||
from .. import config, output, text, util, cache, exception
|
||||
@@ -252,6 +253,23 @@ class Extractor():
|
||||
kwargs.setdefault("allow_redirects", False)
|
||||
return self.request(url, **kwargs).headers.get("location", "")
|
||||
|
||||
def request_xml(self, url, xmlns=True, **kwargs):
|
||||
text = self.request(url, **kwargs).text
|
||||
|
||||
if not xmlns:
|
||||
text = text.replace(" xmlns=", " ns=")
|
||||
|
||||
parser = ElementTree.XMLParser()
|
||||
try:
|
||||
parser.feed(text)
|
||||
return parser.close()
|
||||
except Exception as exc:
|
||||
fatal = kwargs.get("fatal", True)
|
||||
if not fatal or fatal is ...:
|
||||
self.log.warning("%s: %s", exc.__class__.__name__, exc)
|
||||
return ElementTree.Element("")
|
||||
raise
|
||||
|
||||
_handle_429 = util.false
|
||||
|
||||
def wait(self, seconds=None, until=None, adjust=1.0,
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
|
||||
from .. import text, util
|
||||
from xml.etree import ElementTree
|
||||
import re
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
|
||||
@@ -143,9 +142,8 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
|
||||
example = "https://dynasty-scans.com/anthologies/TITLE"
|
||||
|
||||
def items(self):
|
||||
url = "{}/anthologies/{}".format(self.root, self.groups[0])
|
||||
xml = self.request(url + ".atom").text
|
||||
root = ElementTree.fromstring(xml.replace(" xmlns=", " ns="))
|
||||
url = "{}/anthologies/{}.atom".format(self.root, self.groups[0])
|
||||
root = self.request_xml(url, xmlns=False)
|
||||
|
||||
data = {
|
||||
"_extractor": DynastyscansChapterExtractor,
|
||||
@@ -153,7 +151,7 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
|
||||
}
|
||||
|
||||
if self.config("metadata", False):
|
||||
page = self.request(url).text
|
||||
page = self.request(url[:-5]).text
|
||||
alert = text.extr(page, "<div class='alert", "</div>")
|
||||
|
||||
data["alert"] = text.split_html(alert)[1:] if alert else ()
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
from . import booru
|
||||
from .. import text, util, exception
|
||||
|
||||
from xml.etree import ElementTree
|
||||
import collections
|
||||
import re
|
||||
|
||||
@@ -26,7 +25,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
||||
|
||||
def _api_request(self, params):
|
||||
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
|
||||
return ElementTree.fromstring(self.request(url, params=params).text)
|
||||
return self.request_xml(url, params=params)
|
||||
|
||||
def _pagination(self, params):
|
||||
params["pid"] = self.page_start
|
||||
@@ -38,7 +37,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
||||
while True:
|
||||
try:
|
||||
root = self._api_request(params)
|
||||
except ElementTree.ParseError:
|
||||
except SyntaxError: # ElementTree.ParseError
|
||||
if "tags" not in params or post is None:
|
||||
raise
|
||||
taglist = [tag for tag in params["tags"].split()
|
||||
|
||||
@@ -12,8 +12,6 @@ from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
|
||||
from xml.etree import ElementTree
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
|
||||
|
||||
|
||||
@@ -130,7 +128,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
|
||||
|
||||
def posts(self):
|
||||
url = "{}/vr.php?t={}".format(self.root, self.thread_id)
|
||||
return ElementTree.fromstring(self.request(url).text)
|
||||
return self.request_xml(url)
|
||||
|
||||
|
||||
class VipergirlsPostExtractor(VipergirlsExtractor):
|
||||
@@ -147,4 +145,4 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
|
||||
|
||||
def posts(self):
|
||||
url = "{}/vr.php?p={}".format(self.root, self.post_id)
|
||||
return ElementTree.fromstring(self.request(url).text)
|
||||
return self.request_xml(url)
|
||||
|
||||
Reference in New Issue
Block a user