[newgrounds] support 'imageData' files (#4642)
This commit is contained in:
@@ -54,27 +54,30 @@ class NewgroundsExtractor(Extractor):
|
||||
if metadata:
|
||||
post.update(metadata)
|
||||
yield Message.Directory, post
|
||||
post["num"] = 0
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
ext = post["extension"]
|
||||
for num, url in enumerate(text.extract_iter(
|
||||
post["_images"] + post["_comment"],
|
||||
'data-smartload-src="', '"'), 1):
|
||||
post["num"] = num
|
||||
post["_index"] = "{}_{:>02}".format(post["index"], num)
|
||||
if "_multi" in post:
|
||||
for data in post["_multi"]:
|
||||
post["num"] += 1
|
||||
post["_index"] = "{}_{:>02}".format(
|
||||
post["index"], post["num"])
|
||||
post.update(data)
|
||||
url = data["image"]
|
||||
|
||||
text.nameext_from_url(url, post)
|
||||
yield Message.Url, url, post
|
||||
|
||||
if "_fallback" in post:
|
||||
del post["_fallback"]
|
||||
|
||||
for url in text.extract_iter(
|
||||
post["_comment"], 'data-smartload-src="', '"'):
|
||||
post["num"] += 1
|
||||
post["_index"] = "{}_{:>02}".format(
|
||||
post["index"], post["num"])
|
||||
url = text.ensure_http_scheme(url)
|
||||
text.nameext_from_url(url, post)
|
||||
|
||||
if "_fallback" in post:
|
||||
del post["_fallback"]
|
||||
|
||||
if "/comments/" not in url:
|
||||
url = url.replace("/medium_views/", "/images/", 1)
|
||||
if post["extension"] == "webp":
|
||||
post["_fallback"] = (url,)
|
||||
post["extension"] = ext
|
||||
url = url.replace(".webp", "." + ext)
|
||||
|
||||
yield Message.Url, url, post
|
||||
else:
|
||||
self.log.warning(
|
||||
@@ -149,7 +152,6 @@ class NewgroundsExtractor(Extractor):
|
||||
extr = text.extract_from(page)
|
||||
data = extract_data(extr, post_url)
|
||||
|
||||
data["_images"] = extr('<div class="art-images', '\n</div>')
|
||||
data["_comment"] = extr(
|
||||
'id="author_comments"', '</div>').partition(">")[2]
|
||||
data["comment"] = text.unescape(text.remove_html(
|
||||
@@ -168,8 +170,7 @@ class NewgroundsExtractor(Extractor):
|
||||
data["post_url"] = post_url
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def _extract_image_data(extr, url):
|
||||
def _extract_image_data(self, extr, url):
|
||||
full = text.extract_from(util.json_loads(extr(
|
||||
'"full_image_text":', '});')))
|
||||
data = {
|
||||
@@ -187,8 +188,34 @@ class NewgroundsExtractor(Extractor):
|
||||
index = data["url"].rpartition("/")[2].partition("_")[0]
|
||||
data["index"] = text.parse_int(index)
|
||||
data["_index"] = index
|
||||
|
||||
image_data = extr("let imageData =", "\n];")
|
||||
if image_data:
|
||||
data["_multi"] = self._extract_images_multi(image_data)
|
||||
else:
|
||||
art_images = extr('<div class="art-images', '\n</div>')
|
||||
if art_images:
|
||||
data["_multi"] = self._extract_images_art(art_images, data)
|
||||
|
||||
return data
|
||||
|
||||
def _extract_images_multi(self, html):
|
||||
data = util.json_loads(html + "]")
|
||||
yield from data[1:]
|
||||
|
||||
def _extract_images_art(self, html, data):
|
||||
ext = text.ext_from_url(data["url"])
|
||||
for url in text.extract_iter(html, 'data-smartload-src="', '"'):
|
||||
url = text.ensure_http_scheme(url)
|
||||
url = url.replace("/medium_views/", "/images/", 1)
|
||||
if text.ext_from_url(url) == "webp":
|
||||
yield {
|
||||
"image" : url.replace(".webp", "." + ext),
|
||||
"_fallback": (url,),
|
||||
}
|
||||
else:
|
||||
yield {"image": url}
|
||||
|
||||
@staticmethod
|
||||
def _extract_audio_data(extr, url):
|
||||
index = url.split("/")[5]
|
||||
|
||||
Reference in New Issue
Block a user