[common] add 'request_xml()' convenience function
This commit is contained in:
@@ -11,7 +11,6 @@
|
|||||||
from . import booru
|
from . import booru
|
||||||
from .. import text
|
from .. import text
|
||||||
|
|
||||||
from xml.etree import ElementTree
|
|
||||||
import collections
|
import collections
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -52,8 +51,7 @@ class AgnphExtractor(booru.BooruExtractor):
|
|||||||
params["page"] = self.page_start
|
params["page"] = self.page_start
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params).text
|
root = self.request_xml(url, params=params)
|
||||||
root = ElementTree.fromstring(data)
|
|
||||||
|
|
||||||
yield from map(self._xml_to_dict, root)
|
yield from map(self._xml_to_dict, root)
|
||||||
|
|
||||||
@@ -109,5 +107,5 @@ class AgnphPostExtractor(AgnphExtractor):
|
|||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/gallery/post/show/{}/?api=xml".format(
|
url = "{}/gallery/post/show/{}/?api=xml".format(
|
||||||
self.root, self.groups[0])
|
self.root, self.groups[0])
|
||||||
post = ElementTree.fromstring(self.request(url).text)
|
post = self.request_xml(url)
|
||||||
return (self._xml_to_dict(post),)
|
return (self._xml_to_dict(post),)
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import logging
|
|||||||
import datetime
|
import datetime
|
||||||
import requests
|
import requests
|
||||||
import threading
|
import threading
|
||||||
|
from xml.etree import ElementTree
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from .message import Message
|
from .message import Message
|
||||||
from .. import config, output, text, util, cache, exception
|
from .. import config, output, text, util, cache, exception
|
||||||
@@ -252,6 +253,23 @@ class Extractor():
|
|||||||
kwargs.setdefault("allow_redirects", False)
|
kwargs.setdefault("allow_redirects", False)
|
||||||
return self.request(url, **kwargs).headers.get("location", "")
|
return self.request(url, **kwargs).headers.get("location", "")
|
||||||
|
|
||||||
|
def request_xml(self, url, xmlns=True, **kwargs):
|
||||||
|
text = self.request(url, **kwargs).text
|
||||||
|
|
||||||
|
if not xmlns:
|
||||||
|
text = text.replace(" xmlns=", " ns=")
|
||||||
|
|
||||||
|
parser = ElementTree.XMLParser()
|
||||||
|
try:
|
||||||
|
parser.feed(text)
|
||||||
|
return parser.close()
|
||||||
|
except Exception as exc:
|
||||||
|
fatal = kwargs.get("fatal", True)
|
||||||
|
if not fatal or fatal is ...:
|
||||||
|
self.log.warning("%s: %s", exc.__class__.__name__, exc)
|
||||||
|
return ElementTree.Element("")
|
||||||
|
raise
|
||||||
|
|
||||||
_handle_429 = util.false
|
_handle_429 = util.false
|
||||||
|
|
||||||
def wait(self, seconds=None, until=None, adjust=1.0,
|
def wait(self, seconds=None, until=None, adjust=1.0,
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
|
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
from xml.etree import ElementTree
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
|
||||||
@@ -143,9 +142,8 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
|
|||||||
example = "https://dynasty-scans.com/anthologies/TITLE"
|
example = "https://dynasty-scans.com/anthologies/TITLE"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/anthologies/{}".format(self.root, self.groups[0])
|
url = "{}/anthologies/{}.atom".format(self.root, self.groups[0])
|
||||||
xml = self.request(url + ".atom").text
|
root = self.request_xml(url, xmlns=False)
|
||||||
root = ElementTree.fromstring(xml.replace(" xmlns=", " ns="))
|
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"_extractor": DynastyscansChapterExtractor,
|
"_extractor": DynastyscansChapterExtractor,
|
||||||
@@ -153,7 +151,7 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if self.config("metadata", False):
|
if self.config("metadata", False):
|
||||||
page = self.request(url).text
|
page = self.request(url[:-5]).text
|
||||||
alert = text.extr(page, "<div class='alert", "</div>")
|
alert = text.extr(page, "<div class='alert", "</div>")
|
||||||
|
|
||||||
data["alert"] = text.split_html(alert)[1:] if alert else ()
|
data["alert"] = text.split_html(alert)[1:] if alert else ()
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
from . import booru
|
from . import booru
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
|
|
||||||
from xml.etree import ElementTree
|
|
||||||
import collections
|
import collections
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -26,7 +25,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
|
|
||||||
def _api_request(self, params):
|
def _api_request(self, params):
|
||||||
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
|
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
|
||||||
return ElementTree.fromstring(self.request(url, params=params).text)
|
return self.request_xml(url, params=params)
|
||||||
|
|
||||||
def _pagination(self, params):
|
def _pagination(self, params):
|
||||||
params["pid"] = self.page_start
|
params["pid"] = self.page_start
|
||||||
@@ -38,7 +37,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
root = self._api_request(params)
|
root = self._api_request(params)
|
||||||
except ElementTree.ParseError:
|
except SyntaxError: # ElementTree.ParseError
|
||||||
if "tags" not in params or post is None:
|
if "tags" not in params or post is None:
|
||||||
raise
|
raise
|
||||||
taglist = [tag for tag in params["tags"].split()
|
taglist = [tag for tag in params["tags"].split()
|
||||||
|
|||||||
@@ -12,8 +12,6 @@ from .common import Extractor, Message
|
|||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
|
||||||
from xml.etree import ElementTree
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
|
||||||
|
|
||||||
|
|
||||||
@@ -130,7 +128,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
|
|||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/vr.php?t={}".format(self.root, self.thread_id)
|
url = "{}/vr.php?t={}".format(self.root, self.thread_id)
|
||||||
return ElementTree.fromstring(self.request(url).text)
|
return self.request_xml(url)
|
||||||
|
|
||||||
|
|
||||||
class VipergirlsPostExtractor(VipergirlsExtractor):
|
class VipergirlsPostExtractor(VipergirlsExtractor):
|
||||||
@@ -147,4 +145,4 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
|
|||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/vr.php?p={}".format(self.root, self.post_id)
|
url = "{}/vr.php?p={}".format(self.root, self.post_id)
|
||||||
return ElementTree.fromstring(self.request(url).text)
|
return self.request_xml(url)
|
||||||
|
|||||||
Reference in New Issue
Block a user