replace json.loads with direct calls to JSONDecoder.decode

This commit is contained in:
Mike Fährmann
2023-02-07 23:14:53 +01:00
parent b7337d810e
commit dd884b02ee
42 changed files with 117 additions and 154 deletions

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2021 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,6 @@
"""Global configuration module""" """Global configuration module"""
import sys import sys
import json
import os.path import os.path
import logging import logging
from . import util from . import util
@@ -55,18 +54,18 @@ def load(files=None, strict=False, fmt="json"):
if fmt == "yaml": if fmt == "yaml":
try: try:
import yaml import yaml
parsefunc = yaml.safe_load load = yaml.safe_load
except ImportError: except ImportError:
log.error("Could not import 'yaml' module") log.error("Could not import 'yaml' module")
return return
else: else:
parsefunc = json.load load = util.json_loads
for pathfmt in files or _default_configs: for pathfmt in files or _default_configs:
path = util.expand_path(pathfmt) path = util.expand_path(pathfmt)
try: try:
with open(path, encoding="utf-8") as file: with open(path, encoding="utf-8") as file:
confdict = parsefunc(file) conf = load(file.read())
except OSError as exc: except OSError as exc:
if strict: if strict:
log.error(exc) log.error(exc)
@@ -77,9 +76,9 @@ def load(files=None, strict=False, fmt="json"):
sys.exit(2) sys.exit(2)
else: else:
if not _config: if not _config:
_config.update(confdict) _config.update(conf)
else: else:
util.combine_dict(_config, confdict) util.combine_dict(_config, conf)
_files.append(pathfmt) _files.append(pathfmt)

View File

@@ -12,7 +12,6 @@
import binascii import binascii
import contextlib import contextlib
import ctypes import ctypes
import json
import logging import logging
import os import os
import shutil import shutil
@@ -24,7 +23,7 @@ import tempfile
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from hashlib import pbkdf2_hmac from hashlib import pbkdf2_hmac
from http.cookiejar import Cookie from http.cookiejar import Cookie
from . import aes, text from . import aes, text, util
SUPPORTED_BROWSERS_CHROMIUM = { SUPPORTED_BROWSERS_CHROMIUM = {
@@ -169,8 +168,8 @@ def _firefox_cookies_database(profile=None, container=None):
os.path.dirname(path), "containers.json") os.path.dirname(path), "containers.json")
try: try:
with open(containers_path) as containers: with open(containers_path) as file:
identities = json.load(containers)["identities"] identities = util.json_loads(file.read())["identities"]
except OSError: except OSError:
logger.error("Unable to read Firefox container database at %s", logger.error("Unable to read Firefox container database at %s",
containers_path) containers_path)
@@ -716,8 +715,8 @@ def _get_windows_v10_key(browser_root):
logger.error("could not find local state file") logger.error("could not find local state file")
return None return None
logger.debug("Found local state file at '%s'", path) logger.debug("Found local state file at '%s'", path)
with open(path, encoding="utf8") as f: with open(path, encoding="utf-8") as file:
data = json.load(f) data = util.json_loads(file.read())
try: try:
base64_key = data["os_crypt"]["encrypted_key"] base64_key = data["os_crypt"]["encrypted_key"]
except KeyError: except KeyError:

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://comics.8muses.com/""" """Extractors for https://comics.8muses.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, util
import json
class _8musesAlbumExtractor(Extractor): class _8musesAlbumExtractor(Extractor):
@@ -131,7 +130,7 @@ class _8musesAlbumExtractor(Extractor):
@staticmethod @staticmethod
def _unobfuscate(data): def _unobfuscate(data):
return json.loads("".join([ return util.json_loads("".join([
chr(33 + (ord(c) + 14) % 94) if "!" <= c <= "~" else c chr(33 + (ord(c) + 14) % 94) if "!" <= c <= "~" else c
for c in text.unescape(data.strip("\t\n\r !")) for c in text.unescape(data.strip("\t\n\r !"))
])) ]))

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann # Copyright 2021-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, util from .. import text, util
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.)?bbc\.co\.uk(/programmes/" BASE_PATTERN = r"(?:https?://)?(?:www\.)?bbc\.co\.uk(/programmes/"
@@ -38,7 +37,7 @@ class BbcGalleryExtractor(GalleryExtractor):
) )
def metadata(self, page): def metadata(self, page):
data = json.loads(text.extr( data = util.json_loads(text.extr(
page, '<script type="application/ld+json">', '</script>')) page, '<script type="application/ld+json">', '</script>'))
return { return {
"programme": self.gallery_url.split("/")[4], "programme": self.gallery_url.split("/")[4],

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2020-2022 Mike Fährmann # Copyright 2020-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://bcy.net/""" """Extractors for https://bcy.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
import json
import re import re
@@ -100,9 +99,9 @@ class BcyExtractor(Extractor):
.replace('\\\\u002F', '/') .replace('\\\\u002F', '/')
.replace('\\"', '"')) .replace('\\"', '"'))
try: try:
return json.loads(data)["detail"] return util.json_loads(data)["detail"]
except ValueError: except ValueError:
return json.loads(data.replace('\\"', '"'))["detail"] return util.json_loads(data.replace('\\"', '"'))["detail"]
class BcyUserExtractor(BcyExtractor): class BcyUserExtractor(BcyExtractor):

View File

@@ -9,8 +9,7 @@
"""Extractors for https://www.behance.net/""" """Extractors for https://www.behance.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, util
import json
class BehanceExtractor(Extractor): class BehanceExtractor(Extractor):
@@ -120,7 +119,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
} }
page = self.request(url, cookies=cookies).text page = self.request(url, cookies=cookies).text
data = json.loads(text.extr( data = util.json_loads(text.extr(
page, 'id="beconfig-store_state">', '</script>')) page, 'id="beconfig-store_state">', '</script>'))
return self._update(data["project"]["project"]) return self._update(data["project"]["project"])

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for Blogger blogs""" """Extractors for Blogger blogs"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, util
import json
import re import re
BASE_PATTERN = ( BASE_PATTERN = (
@@ -61,7 +60,7 @@ class BloggerExtractor(Extractor):
page = self.request(post["url"]).text page = self.request(post["url"]).text
for url in findall_video(page): for url in findall_video(page):
page = self.request(url).text page = self.request(url).text
video_config = json.loads(text.extr( video_config = util.json_loads(text.extr(
page, 'var VIDEO_CONFIG =', '\n')) page, 'var VIDEO_CONFIG =', '\n'))
files.append(max( files.append(max(
video_config["streams"], video_config["streams"],

View File

@@ -9,8 +9,7 @@
"""Extractors for https://bunkr.ru/""" """Extractors for https://bunkr.ru/"""
from .lolisafe import LolisafeAlbumExtractor from .lolisafe import LolisafeAlbumExtractor
from .. import text from .. import text, util
import json
class BunkrAlbumExtractor(LolisafeAlbumExtractor): class BunkrAlbumExtractor(LolisafeAlbumExtractor):
@@ -49,7 +48,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
root = self.root root = self.root
try: try:
data = json.loads(text.extr( data = util.json_loads(text.extr(
self.request(root + "/a/" + self.album_id).text, self.request(root + "/a/" + self.album_id).text,
'id="__NEXT_DATA__" type="application/json">', '<')) 'id="__NEXT_DATA__" type="application/json">', '<'))
album = data["props"]["pageProps"]["album"] album = data["props"]["pageProps"]["album"]

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2022 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://dynasty-scans.com/""" """Extractors for https://dynasty-scans.com/"""
from .common import ChapterExtractor, MangaExtractor, Extractor, Message from .common import ChapterExtractor, MangaExtractor, Extractor, Message
from .. import text from .. import text, util
import json
import re import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
@@ -86,7 +85,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
data = text.extr(page, "var pages = ", ";\n") data = text.extr(page, "var pages = ", ";\n")
return [ return [
(self.root + img["image"], None) (self.root + img["image"], None)
for img in json.loads(data) for img in util.json_loads(data)
] ]

View File

@@ -6,11 +6,10 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://www.fascans.com/""" """Extractors for https://www.fascans.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text, util
import json
class FallenangelsChapterExtractor(ChapterExtractor): class FallenangelsChapterExtractor(ChapterExtractor):
@@ -56,7 +55,7 @@ class FallenangelsChapterExtractor(ChapterExtractor):
def images(page): def images(page):
return [ return [
(img["page_image"], None) (img["page_image"], None)
for img in json.loads( for img in util.json_loads(
text.extr(page, "var pages = ", ";") text.extr(page, "var pages = ", ";")
) )
] ]

View File

@@ -7,8 +7,7 @@
"""Extractors for https://fantia.jp/""" """Extractors for https://fantia.jp/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, util
import json
class FantiaExtractor(Extractor): class FantiaExtractor(Extractor):
@@ -117,7 +116,7 @@ class FantiaExtractor(Extractor):
yield self.root+"/"+content["download_uri"], post yield self.root+"/"+content["download_uri"], post
if content["category"] == "blog" and "comment" in content: if content["category"] == "blog" and "comment" in content:
comment_json = json.loads(content["comment"]) comment_json = util.json_loads(content["comment"])
ops = comment_json.get("ops", ()) ops = comment_json.get("ops", ())
# collect blogpost text first # collect blogpost text first

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016-2022 Mike Fährmann # Copyright 2016-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text, util from .. import text, util
import json
class FoolslideExtractor(BaseExtractor): class FoolslideExtractor(BaseExtractor):
@@ -106,7 +105,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
}) })
def images(self, page): def images(self, page):
return json.loads(text.extr(page, "var pages = ", ";")) return util.json_loads(text.extr(page, "var pages = ", ";"))
class FoolslideMangaExtractor(FoolslideExtractor): class FoolslideMangaExtractor(FoolslideExtractor):

View File

@@ -1,16 +1,15 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from https://www.hbrowse.com/""" """Extractors for https://www.hbrowse.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, exception from .. import text, util, exception
import json
class HbrowseBase(): class HbrowseBase():
@@ -68,7 +67,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
def images(self, page): def images(self, page):
base = self.root + "/data" + self.path base = self.root + "/data" + self.path
json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]" json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]"
return [(base + name, None) for name in json.loads(json_data)] return [(base + name, None) for name in util.json_loads(json_data)]
class HbrowseMangaExtractor(HbrowseBase, MangaExtractor): class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):

View File

@@ -9,8 +9,7 @@
"""Extractors for https://hentai2read.com/""" """Extractors for https://hentai2read.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text from .. import text, util
import json
import re import re
@@ -78,7 +77,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
images = text.extract(page, "'images' : ", ",\n")[0] images = text.extract(page, "'images' : ", ",\n")[0]
return [ return [
("https://hentaicdn.com/hentai" + part, None) ("https://hentaicdn.com/hentai" + part, None)
for part in json.loads(images) for part in util.json_loads(images)
] ]

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2021 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://hentaifox.com/""" """Extractors for https://hentaifox.com/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text from .. import text, util
import json
class HentaifoxBase(): class HentaifoxBase():
@@ -90,7 +89,7 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
server1 = "https://i.hentaifox.com" server1 = "https://i.hentaifox.com"
server2 = "https://i2.hentaifox.com" server2 = "https://i2.hentaifox.com"
for num, image in json.loads(data).items(): for num, image in util.json_loads(data).items():
ext, width, height = image.split(",") ext, width, height = image.split(",")
path = urlfmt(num, extmap[ext]) path = urlfmt(num, extmap[ext])
append((server1 + path, { append((server1 + path, {

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2020-2022 Mike Fährmann # Copyright 2020-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, util from .. import text, util
import json
class HentaihandGalleryExtractor(GalleryExtractor): class HentaihandGalleryExtractor(GalleryExtractor):
@@ -46,7 +45,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
GalleryExtractor.__init__(self, match, url) GalleryExtractor.__init__(self, match, url)
def metadata(self, page): def metadata(self, page):
info = json.loads(page) info = util.json_loads(page)
data = { data = {
"gallery_id" : text.parse_int(info["id"]), "gallery_id" : text.parse_int(info["id"]),
"title" : info["title"], "title" : info["title"],

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016-2022 Mike Fährmann # Copyright 2016-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://hentaihere.com/""" """Extractors for https://hentaihere.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text from .. import text, util
import json
import re import re
@@ -80,7 +79,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
images = text.extr(page, "var rff_imageList = ", ";") images = text.extr(page, "var rff_imageList = ", ";")
return [ return [
("https://hentaicdn.com/hentai" + part, None) ("https://hentaicdn.com/hentai" + part, None)
for part in json.loads(images) for part in util.json_loads(images)
] ]

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2022 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -13,7 +13,6 @@ from .nozomi import decode_nozomi
from ..cache import memcache from ..cache import memcache
from .. import text, util from .. import text, util
import string import string
import json
import re import re
@@ -75,7 +74,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
self.root, gid) self.root, gid)
def metadata(self, page): def metadata(self, page):
self.info = info = json.loads(page.partition("=")[2]) self.info = info = util.json_loads(page.partition("=")[2])
iget = info.get iget = info.get
language = iget("language") language = iget("language")

View File

@@ -9,8 +9,7 @@
"""Extractors for https://www.imagefap.com/""" """Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
@@ -173,7 +172,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
page, 'id="imageid_input" value="', '"', pos) page, 'id="imageid_input" value="', '"', pos)
gallery_id, pos = text.extract( gallery_id, pos = text.extract(
page, 'id="galleryid_input" value="', '"', pos) page, 'id="galleryid_input" value="', '"', pos)
info = json.loads(info) info = util.json_loads(info)
url = info["contentUrl"] url = info["contentUrl"]
return url, text.nameext_from_url(url, { return url, text.nameext_from_url(url, {

View File

@@ -9,9 +9,8 @@
"""Extractors for https://imgbb.com/""" """Extractors for https://imgbb.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import json
class ImgbbExtractor(Extractor): class ImgbbExtractor(Extractor):
@@ -98,7 +97,7 @@ class ImgbbExtractor(Extractor):
while True: while True:
for img in text.extract_iter(page, "data-object='", "'"): for img in text.extract_iter(page, "data-object='", "'"):
yield json.loads(text.unquote(img)) yield util.json_loads(text.unquote(img))
if data: if data:
if params["seek"] == data["seekEnd"]: if params["seek"] == data["seekEnd"]:
return return

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://issuu.com/""" """Extractors for https://issuu.com/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text from .. import text, util
import json
class IssuuBase(): class IssuuBase():
@@ -54,7 +53,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
}) })
def metadata(self, page): def metadata(self, page):
data = json.loads(text.extr( data = util.json_loads(text.extr(
page, '<script data-json="', '"').replace("&quot;", '"')) page, '<script data-json="', '"').replace("&quot;", '"'))
doc = data["initialDocumentData"]["document"] doc = data["initialDocumentData"]["document"]

View File

@@ -7,8 +7,7 @@
"""Extractors for https://lightroom.adobe.com/""" """Extractors for https://lightroom.adobe.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, util
import json
class LightroomGalleryExtractor(Extractor): class LightroomGalleryExtractor(Extractor):
@@ -46,7 +45,7 @@ class LightroomGalleryExtractor(Extractor):
# Get config # Get config
url = "https://lightroom.adobe.com/shares/" + self.href url = "https://lightroom.adobe.com/shares/" + self.href
response = self.request(url) response = self.request(url)
album = json.loads( album = util.json_loads(
text.extr(response.text, "albumAttributes: ", "\n") text.extr(response.text, "albumAttributes: ", "\n")
) )
@@ -75,7 +74,7 @@ class LightroomGalleryExtractor(Extractor):
url = base_url + next_url url = base_url + next_url
page = self.request(url).text page = self.request(url).text
# skip 1st line as it's a JS loop # skip 1st line as it's a JS loop
data = json.loads(page[page.index("\n") + 1:]) data = util.json_loads(page[page.index("\n") + 1:])
base_url = data["base"] base_url = data["base"]
for res in data["resources"]: for res in data["resources"]:

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2022 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://mangapark.net/""" """Extractors for https://mangapark.net/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, exception from .. import text, util, exception
import json
import re import re
@@ -104,7 +103,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
return data return data
def images(self, page): def images(self, page):
data = json.loads(text.extr(page, "var _load_pages =", ";")) data = util.json_loads(text.extr(page, "var _load_pages =", ";"))
return [ return [
(text.urljoin(self.root, item["u"]), { (text.urljoin(self.root, item["u"]), {
"width": text.parse_int(item["w"]), "width": text.parse_int(item["w"]),

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2021-2022 Mike Fährmann # Copyright 2021-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text, util
import json
class MangaseeBase(): class MangaseeBase():
@@ -94,7 +93,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
def metadata(self, page): def metadata(self, page):
extr = text.extract_from(page) extr = text.extract_from(page)
self.chapter = data = json.loads(extr("vm.CurChapter =", ";\r\n")) self.chapter = data = util.json_loads(extr("vm.CurChapter =", ";\r\n"))
self.domain = extr('vm.CurPathName = "', '"') self.domain = extr('vm.CurPathName = "', '"')
self.slug = extr('vm.IndexName = "', '"') self.slug = extr('vm.IndexName = "', '"')
@@ -143,7 +142,7 @@ class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
def chapters(self, page): def chapters(self, page):
slug, pos = text.extract(page, 'vm.IndexName = "', '"') slug, pos = text.extract(page, 'vm.IndexName = "', '"')
chapters = json.loads(text.extract( chapters = util.json_loads(text.extract(
page, "vm.Chapters = ", ";\r\n", pos)[0]) page, "vm.Chapters = ", ";\r\n", pos)[0])
result = [] result = []

View File

@@ -7,8 +7,7 @@
"""Extractors for https://nana.my.id/""" """Extractors for https://nana.my.id/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, exception from .. import text, util, exception
import json
class NanaGalleryExtractor(GalleryExtractor): class NanaGalleryExtractor(GalleryExtractor):
@@ -59,7 +58,7 @@ class NanaGalleryExtractor(GalleryExtractor):
} }
def images(self, page): def images(self, page):
data = json.loads(text.extr(page, "Reader.pages = ", ".pages")) data = util.json_loads(text.extr(page, "Reader.pages = ", ".pages"))
return [ return [
("https://nana.my.id" + image, None) ("https://nana.my.id" + image, None)
for image in data["pages"] for image in data["pages"]

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018-2022 Mike Fährmann # Copyright 2018-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,10 +9,9 @@
"""Extractors for https://www.newgrounds.com/""" """Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import itertools import itertools
import json
class NewgroundsExtractor(Extractor): class NewgroundsExtractor(Extractor):
@@ -151,7 +150,8 @@ class NewgroundsExtractor(Extractor):
@staticmethod @staticmethod
def _extract_image_data(extr, url): def _extract_image_data(extr, url):
full = text.extract_from(json.loads(extr('"full_image_text":', '});'))) full = text.extract_from(util.json_loads(extr(
'"full_image_text":', '});')))
data = { data = {
"title" : text.unescape(extr('"og:title" content="', '"')), "title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2021 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, util from .. import text, util
import collections import collections
import json
class NhentaiGalleryExtractor(GalleryExtractor): class NhentaiGalleryExtractor(GalleryExtractor):
@@ -48,7 +47,7 @@ class NhentaiGalleryExtractor(GalleryExtractor):
GalleryExtractor.__init__(self, match, url) GalleryExtractor.__init__(self, match, url)
def metadata(self, page): def metadata(self, page):
self.data = data = json.loads(page) self.data = data = util.json_loads(page)
title_en = data["title"].get("english", "") title_en = data["title"].get("english", "")
title_ja = data["title"].get("japanese", "") title_ja = data["title"].get("japanese", "")

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,11 +9,10 @@
"""Extractors for https://www.patreon.com/""" """Extractors for https://www.patreon.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
from ..cache import memcache from ..cache import memcache
import collections import collections
import itertools import itertools
import json
class PatreonExtractor(Extractor): class PatreonExtractor(Extractor):
@@ -251,7 +250,7 @@ class PatreonExtractor(Extractor):
return [genmap[ft] for ft in filetypes] return [genmap[ft] for ft in filetypes]
def _extract_bootstrap(self, page): def _extract_bootstrap(self, page):
return json.loads(text.extr( return util.json_loads(text.extr(
page, "window.patreon.bootstrap,", "\n});") + "}") page, "window.patreon.bootstrap,", "\n});") + "}")

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,10 +9,9 @@
"""Extractors for https://www.plurk.com/""" """Extractors for https://www.plurk.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
import datetime import datetime
import time import time
import json
import re import re
@@ -66,7 +65,7 @@ class PlurkExtractor(Extractor):
def _load(data): def _load(data):
if not data: if not data:
raise exception.NotFoundError("user") raise exception.NotFoundError("user")
return json.loads(re.sub(r"new Date\(([^)]+)\)", r"\1", data)) return util.json_loads(re.sub(r"new Date\(([^)]+)\)", r"\1", data))
class PlurkTimelineExtractor(PlurkExtractor): class PlurkTimelineExtractor(PlurkExtractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2021 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import GalleryExtractor from .common import GalleryExtractor
from .. import text, util from .. import text, util
import binascii import binascii
import json
class PururinGalleryExtractor(GalleryExtractor): class PururinGalleryExtractor(GalleryExtractor):
@@ -73,7 +72,7 @@ class PururinGalleryExtractor(GalleryExtractor):
url = "{}/read/{}/01/x".format(self.root, self.gallery_id) url = "{}/read/{}/01/x".format(self.root, self.gallery_id)
page = self.request(url).text page = self.request(url).text
info = json.loads(binascii.a2b_base64(text.extr( info = util.json_loads(binascii.a2b_base64(text.extr(
page, '<gallery-read encoded="', '"')).decode()) page, '<gallery-read encoded="', '"')).decode())
self._ext = info["image_extension"] self._ext = info["image_extension"]
self._cnt = info["total_pages"] self._cnt = info["total_pages"]

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,9 +9,8 @@
"""Generic extractors for *reactor sites""" """Generic extractors for *reactor sites"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text from .. import text, util
import urllib.parse import urllib.parse
import json
class ReactorExtractor(BaseExtractor): class ReactorExtractor(BaseExtractor):
@@ -84,13 +83,13 @@ class ReactorExtractor(BaseExtractor):
script = script[:script.index("</")].strip() script = script[:script.index("</")].strip()
try: try:
data = json.loads(script) data = util.json_loads(script)
except ValueError: except ValueError:
try: try:
# remove control characters and escape backslashes # remove control characters and escape backslashes
mapping = dict.fromkeys(range(32)) mapping = dict.fromkeys(range(32))
script = script.translate(mapping).replace("\\", "\\\\") script = script.translate(mapping).replace("\\", "\\\\")
data = json.loads(script) data = util.json_loads(script)
except ValueError as exc: except ValueError as exc:
self.log.warning("Unable to parse JSON data: %s", exc) self.log.warning("Unable to parse JSON data: %s", exc)
return return

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016-2022 Mike Fährmann, Leonardo Taccari # Copyright 2016-2023 Mike Fährmann, Leonardo Taccari
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://www.slideshare.net/""" """Extractors for https://www.slideshare.net/"""
from .common import GalleryExtractor from .common import GalleryExtractor
from .. import text from .. import text, util
import json
class SlidesharePresentationExtractor(GalleryExtractor): class SlidesharePresentationExtractor(GalleryExtractor):
@@ -97,7 +96,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
@staticmethod @staticmethod
def images(page): def images(page):
data = json.loads(text.extract( data = util.json_loads(text.extract(
page, "xtend(true, slideshare_object.slideshow_config, ", ");")[0]) page, "xtend(true, slideshare_object.slideshow_config, ", ");")[0])
# useing 'stripped_title' here is technically wrong, but it works all # useing 'stripped_title' here is technically wrong, but it works all

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2020-2022 Mike Fährmann # Copyright 2020-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,9 +9,8 @@
"""Extractors for https://www.subscribestar.com/""" """Extractors for https://www.subscribestar.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)" BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
@@ -92,7 +91,7 @@ class SubscribestarExtractor(Extractor):
gallery = text.extr(html, 'data-gallery="', '"') gallery = text.extr(html, 'data-gallery="', '"')
if gallery: if gallery:
media.extend( media.extend(
item for item in json.loads(text.unescape(gallery)) item for item in util.json_loads(text.unescape(gallery))
if "/previews/" not in item["url"] if "/previews/" not in item["url"]
) )

View File

@@ -216,7 +216,7 @@ class TwitterExtractor(Extractor):
files.append(value) files.append(value)
return return
elif name == "unified_card": elif name == "unified_card":
data = json.loads(bvals["unified_card"]["string_value"]) data = util.json_loads(bvals["unified_card"]["string_value"])
self._extract_media(tweet, data["media_entities"].values(), files) self._extract_media(tweet, data["media_entities"].values(), files)
return return

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://vsco.co/""" """Extractors for https://vsco.co/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, util
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)" BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)"
@@ -69,7 +68,7 @@ class VscoExtractor(Extractor):
def _extract_preload_state(self, url): def _extract_preload_state(self, url):
page = self.request(url, notfound=self.subcategory).text page = self.request(url, notfound=self.subcategory).text
return json.loads(text.extr(page, "__PRELOADED_STATE__ = ", "<")) return util.json_loads(text.extr(page, "__PRELOADED_STATE__ = ", "<"))
def _pagination(self, url, params, token, key, extra=None): def _pagination(self, url, params, token, key, extra=None):
headers = { headers = {

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -9,10 +9,9 @@
"""Extractors for https://www.weibo.com/""" """Extractors for https://www.weibo.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import random import random
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?weibo\.c(?:om|n)" BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?weibo\.c(?:om|n)"
USER_PATTERN = BASE_PATTERN + r"/(?:(u|n|p(?:rofile)?)/)?([^/?#]+)(?:/home)?" USER_PATTERN = BASE_PATTERN + r"/(?:(u|n|p(?:rofile)?)/)?([^/?#]+)(?:/home)?"
@@ -179,7 +178,7 @@ class WeiboExtractor(Extractor):
page = Extractor.request( page = Extractor.request(
self, passport_url, method="POST", headers=headers, data=data).text self, passport_url, method="POST", headers=headers, data=data).text
data = json.loads(text.extr(page, "(", ");"))["data"] data = util.json_loads(text.extr(page, "(", ");"))["data"]
passport_url = "https://passport.weibo.com/visitor/visitor" passport_url = "https://passport.weibo.com/visitor/visitor"
params = { params = {

View File

@@ -7,8 +7,7 @@
"""Extractors for https://www.wikifeet.com/""" """Extractors for https://www.wikifeet.com/"""
from .common import GalleryExtractor from .common import GalleryExtractor
from .. import text from .. import text, util
import json
class WikifeetGalleryExtractor(GalleryExtractor): class WikifeetGalleryExtractor(GalleryExtractor):
@@ -114,5 +113,5 @@ class WikifeetGalleryExtractor(GalleryExtractor):
"height": data["ph"], "height": data["ph"],
"tags" : [tagmap[tag] for tag in data["tags"]], "tags" : [tagmap[tag] for tag in data["tags"]],
}) })
for data in json.loads(text.extr(page, "['gdata'] = ", ";")) for data in util.json_loads(text.extr(page, "['gdata'] = ", ";"))
] ]

View File

@@ -9,9 +9,7 @@
"""Extractors for https://xhamster.com/""" """Extractors for https://xhamster.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, util
import json
BASE_PATTERN = (r"(?:https?://)?((?:[\w-]+\.)?xhamster" BASE_PATTERN = (r"(?:https?://)?((?:[\w-]+\.)?xhamster"
r"(?:\d?\.(?:com|one|desi)|\.porncache\.net))") r"(?:\d?\.(?:com|one|desi)|\.porncache\.net))")
@@ -144,7 +142,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
def _data(self, url): def _data(self, url):
page = self.request(url).text page = self.request(url).text
return json.loads(text.extr( return util.json_loads(text.extr(
page, "window.initials=", "</script>").rstrip("\n\r;")) page, "window.initials=", "</script>").rstrip("\n\r;"))

View File

@@ -9,8 +9,7 @@
"""Extractors for https://www.xvideos.com/""" """Extractors for https://www.xvideos.com/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text from .. import text, util
import json
class XvideosBase(): class XvideosBase():
@@ -113,7 +112,7 @@ class XvideosUserExtractor(XvideosBase, Extractor):
def items(self): def items(self):
url = "{}/profiles/{}".format(self.root, self.user) url = "{}/profiles/{}".format(self.root, self.user)
page = self.request(url, notfound=self.subcategory).text page = self.request(url, notfound=self.subcategory).text
data = json.loads(text.extr( data = util.json_loads(text.extr(
page, "xv.conf=", ";</script>"))["data"] page, "xv.conf=", ";</script>"))["data"]
if not isinstance(data["galleries"], dict): if not isinstance(data["galleries"], dict):

View File

@@ -10,9 +10,8 @@
import argparse import argparse
import logging import logging
import json
import sys import sys
from . import job, version from . import job, util, version
class ConfigAction(argparse.Action): class ConfigAction(argparse.Action):
@@ -79,7 +78,7 @@ class Formatter(argparse.HelpFormatter):
def _parse_option(opt): def _parse_option(opt):
key, _, value = opt.partition("=") key, _, value = opt.partition("=")
try: try:
value = json.loads(value) value = util.json_loads(value)
except ValueError: except ValueError:
pass pass
return key, value return key, value

View File

@@ -204,6 +204,9 @@ def datetime_to_timestamp_string(dt):
return "" return ""
json_loads = json._default_decoder.decode
def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4): def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4):
"""Serialize 'obj' as JSON and write it to 'fp'""" """Serialize 'obj' as JSON and write it to 'fp'"""
json.dump( json.dump(
@@ -513,7 +516,7 @@ def parse_inputfile(file, log):
continue continue
try: try:
value = json.loads(value.strip()) value = json_loads(value.strip())
except ValueError as exc: except ValueError as exc:
log.warning("input file: unable to parse '%s': %s", value, exc) log.warning("input file: unable to parse '%s': %s", value, exc)
continue continue

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2020 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -11,12 +11,11 @@ import os
import sys import sys
import unittest import unittest
import json
import tempfile import tempfile
ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, ROOTDIR) sys.path.insert(0, ROOTDIR)
from gallery_dl import config # noqa E402 from gallery_dl import config, util # noqa E402
class TestConfig(unittest.TestCase): class TestConfig(unittest.TestCase):
@@ -209,8 +208,8 @@ class TestConfigFiles(unittest.TestCase):
def _load(name): def _load(name):
path = os.path.join(ROOTDIR, "docs", name) path = os.path.join(ROOTDIR, "docs", name)
try: try:
with open(path) as fp: with open(path) as file:
return json.load(fp) return util.json_loads(file.read())
except FileNotFoundError: except FileNotFoundError:
raise unittest.SkipTest(path + " not available") raise unittest.SkipTest(path + " not available")