[common] add '_extract_nextdata' method
This commit is contained in:
@@ -591,6 +591,10 @@ class Extractor():
|
|||||||
return util.json_loads(text.extr(
|
return util.json_loads(text.extr(
|
||||||
page, '<script type="application/ld+json">', "</script>"))
|
page, '<script type="application/ld+json">', "</script>"))
|
||||||
|
|
||||||
|
def _extract_nextdata(self, page):
|
||||||
|
return util.json_loads(text.extr(
|
||||||
|
page, ' id="__NEXT_DATA__" type="application/json">', "</script>"))
|
||||||
|
|
||||||
def _prepare_ddosguard_cookies(self):
|
def _prepare_ddosguard_cookies(self):
|
||||||
if not self.cookies.get("__ddg2", domain=self.cookies_domain):
|
if not self.cookies.get("__ddg2", domain=self.cookies_domain):
|
||||||
self.cookies.set(
|
self.cookies.set(
|
||||||
|
|||||||
@@ -43,8 +43,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
|||||||
ChapterExtractor.__init__(self, match, url)
|
ChapterExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
data = util.json_loads(text.extr(
|
data = self._extract_nextdata(page)
|
||||||
page, 'id="__NEXT_DATA__" type="application/json">', '<'))
|
|
||||||
chapter = (data["props"]["pageProps"]["dehydratedState"]
|
chapter = (data["props"]["pageProps"]["dehydratedState"]
|
||||||
["queries"][0]["state"]["data"]["data"])
|
["queries"][0]["state"]["data"]["data"])
|
||||||
manga = chapter["comicNode"]["data"]
|
manga = chapter["comicNode"]["data"]
|
||||||
|
|||||||
@@ -286,15 +286,12 @@ class PatreonExtractor(Extractor):
|
|||||||
return [genmap[ft] for ft in filetypes]
|
return [genmap[ft] for ft in filetypes]
|
||||||
|
|
||||||
def _extract_bootstrap(self, page):
|
def _extract_bootstrap(self, page):
|
||||||
data = text.extr(
|
try:
|
||||||
page, 'id="__NEXT_DATA__" type="application/json">', '</script')
|
data = self._extract_nextdata(page)
|
||||||
if data:
|
env = data["props"]["pageProps"]["bootstrapEnvelope"]
|
||||||
try:
|
return env.get("pageBootstrap") or env["bootstrap"]
|
||||||
data = util.json_loads(data)
|
except Exception as exc:
|
||||||
env = data["props"]["pageProps"]["bootstrapEnvelope"]
|
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||||
return env.get("pageBootstrap") or env["bootstrap"]
|
|
||||||
except Exception as exc:
|
|
||||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
|
||||||
|
|
||||||
bootstrap = text.extr(
|
bootstrap = text.extr(
|
||||||
page, 'window.patreon = {"bootstrap":', '},"apiServer"')
|
page, 'window.patreon = {"bootstrap":', '},"apiServer"')
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"""Extractors for https://www.slideshare.net/"""
|
"""Extractors for https://www.slideshare.net/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor
|
from .common import GalleryExtractor
|
||||||
from .. import text, util
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
class SlidesharePresentationExtractor(GalleryExtractor):
|
class SlidesharePresentationExtractor(GalleryExtractor):
|
||||||
@@ -31,8 +31,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
|
|||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
data = util.json_loads(text.extr(
|
data = self._extract_nextdata(page)
|
||||||
page, 'id="__NEXT_DATA__" type="application/json">', '</script>'))
|
|
||||||
self.slideshow = slideshow = data["props"]["pageProps"]["slideshow"]
|
self.slideshow = slideshow = data["props"]["pageProps"]["slideshow"]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
Reference in New Issue
Block a user