[common] add '_extract_nextdata' method
This commit is contained in:
@@ -591,6 +591,10 @@ class Extractor():
|
||||
return util.json_loads(text.extr(
|
||||
page, '<script type="application/ld+json">', "</script>"))
|
||||
|
||||
def _extract_nextdata(self, page):
|
||||
return util.json_loads(text.extr(
|
||||
page, ' id="__NEXT_DATA__" type="application/json">', "</script>"))
|
||||
|
||||
def _prepare_ddosguard_cookies(self):
|
||||
if not self.cookies.get("__ddg2", domain=self.cookies_domain):
|
||||
self.cookies.set(
|
||||
|
||||
@@ -43,8 +43,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
data = util.json_loads(text.extr(
|
||||
page, 'id="__NEXT_DATA__" type="application/json">', '<'))
|
||||
data = self._extract_nextdata(page)
|
||||
chapter = (data["props"]["pageProps"]["dehydratedState"]
|
||||
["queries"][0]["state"]["data"]["data"])
|
||||
manga = chapter["comicNode"]["data"]
|
||||
|
||||
@@ -286,15 +286,12 @@ class PatreonExtractor(Extractor):
|
||||
return [genmap[ft] for ft in filetypes]
|
||||
|
||||
def _extract_bootstrap(self, page):
|
||||
data = text.extr(
|
||||
page, 'id="__NEXT_DATA__" type="application/json">', '</script')
|
||||
if data:
|
||||
try:
|
||||
data = util.json_loads(data)
|
||||
env = data["props"]["pageProps"]["bootstrapEnvelope"]
|
||||
return env.get("pageBootstrap") or env["bootstrap"]
|
||||
except Exception as exc:
|
||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||
try:
|
||||
data = self._extract_nextdata(page)
|
||||
env = data["props"]["pageProps"]["bootstrapEnvelope"]
|
||||
return env.get("pageBootstrap") or env["bootstrap"]
|
||||
except Exception as exc:
|
||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||
|
||||
bootstrap = text.extr(
|
||||
page, 'window.patreon = {"bootstrap":', '},"apiServer"')
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"""Extractors for https://www.slideshare.net/"""
|
||||
|
||||
from .common import GalleryExtractor
|
||||
from .. import text, util
|
||||
from .. import text
|
||||
|
||||
|
||||
class SlidesharePresentationExtractor(GalleryExtractor):
|
||||
@@ -31,8 +31,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
data = util.json_loads(text.extr(
|
||||
page, 'id="__NEXT_DATA__" type="application/json">', '</script>'))
|
||||
data = self._extract_nextdata(page)
|
||||
self.slideshow = slideshow = data["props"]["pageProps"]["slideshow"]
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user