[common] add '_extract_jsonld' method (#5272)
This commit is contained in:
@@ -26,8 +26,7 @@ class BbcGalleryExtractor(GalleryExtractor):
|
||||
example = "https://www.bbc.co.uk/programmes/PATH"
|
||||
|
||||
def metadata(self, page):
|
||||
data = util.json_loads(text.extr(
|
||||
page, '<script type="application/ld+json">', '</script>'))
|
||||
data = self._extract_jsonld(page)
|
||||
return {
|
||||
"programme": self.gallery_url.split("/")[4],
|
||||
"path": list(util.unique_sequence(
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://ci-en.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"
|
||||
|
||||
@@ -56,11 +56,8 @@ class CienArticleExtractor(CienExtractor):
|
||||
self.root, self.groups[0], self.groups[1])
|
||||
page = self.request(url, notfound="article").text
|
||||
|
||||
post = util.json_loads(text.extr(
|
||||
page, '<script type="application/ld+json">', '</script>'))[0]
|
||||
|
||||
files = self._extract_files(page)
|
||||
|
||||
post = self._extract_jsonld(page)[0]
|
||||
post["post_url"] = url
|
||||
post["post_id"] = text.parse_int(self.groups[1])
|
||||
post["count"] = len(files)
|
||||
|
||||
@@ -587,6 +587,10 @@ class Extractor():
|
||||
return True
|
||||
return False
|
||||
|
||||
def _extract_jsonld(self, page):
|
||||
return util.json_loads(text.extr(
|
||||
page, '<script type="application/ld+json">', "</script>"))
|
||||
|
||||
def _prepare_ddosguard_cookies(self):
|
||||
if not self.cookies.get("__ddg2", domain=self.cookies_domain):
|
||||
self.cookies.set(
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://www.imagefap.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from .. import text, exception
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
|
||||
|
||||
@@ -129,13 +129,11 @@ class ImagefapImageExtractor(ImagefapExtractor):
|
||||
|
||||
url, pos = text.extract(
|
||||
page, 'original="', '"')
|
||||
info, pos = text.extract(
|
||||
page, '<script type="application/ld+json">', '</script>', pos)
|
||||
image_id, pos = text.extract(
|
||||
page, 'id="imageid_input" value="', '"', pos)
|
||||
gallery_id, pos = text.extract(
|
||||
page, 'id="galleryid_input" value="', '"', pos)
|
||||
info = util.json_loads(info)
|
||||
info = self._extract_jsonld(page)
|
||||
|
||||
return url, text.nameext_from_url(url, {
|
||||
"title": text.unescape(info["name"]),
|
||||
|
||||
Reference in New Issue
Block a user