replace more '.format(…)' calls with f-strings (#7671)

This commit is contained in:
Mike Fährmann
2025-07-10 21:05:10 +02:00
parent a619638178
commit 096bc6f784
14 changed files with 54 additions and 78 deletions

View File

@@ -19,7 +19,6 @@ class _2chanThreadExtractor(Extractor):
directory_fmt = ("{category}", "{board_name}", "{thread}") directory_fmt = ("{category}", "{board_name}", "{thread}")
filename_fmt = "{tim}.{extension}" filename_fmt = "{tim}.{extension}"
archive_fmt = "{board}_{thread}_{tim}" archive_fmt = "{board}_{thread}_{tim}"
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/?#]+)/res/(\d+)" pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/?#]+)/res/(\d+)"
example = "https://dec.2chan.net/12/res/12345.htm" example = "https://dec.2chan.net/12/res/12345.htm"
@@ -37,7 +36,8 @@ class _2chanThreadExtractor(Extractor):
if "filename" not in post: if "filename" not in post:
continue continue
post.update(data) post.update(data)
url = self.url_fmt.format_map(post) url = (f"https://{post['server']}.2chan.net"
f"/{post['board']}/src/{post['filename']}")
yield Message.Url, url, post yield Message.Url, url, post
def metadata(self, page): def metadata(self, page):

View File

@@ -417,7 +417,7 @@ class Extractor():
for key, value in HEADERS[browser]: for key, value in HEADERS[browser]:
if value and "{}" in value: if value and "{}" in value:
headers[key] = value.format(platform) headers[key] = value.replace("{}", platform)
else: else:
headers[key] = value headers[key] = value

View File

@@ -322,7 +322,7 @@ class DeviantartExtractor(Extractor):
header = HEADER_TEMPLATE.format( header = HEADER_TEMPLATE.format(
title=title, title=title,
url=url, url=url,
userurl="{}/{}/".format(self.root, urlname), userurl=f"{self.root}/{urlname}/",
username=username, username=username,
date=deviation["date"], date=deviation["date"],
) )
@@ -747,13 +747,10 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
deviation["_fallback"] = (content["src"],) deviation["_fallback"] = (content["src"],)
deviation["is_original"] = True deviation["is_original"] = True
pl = binascii.b2a_base64(payload).rstrip(b'=\n').decode()
content["src"] = ( content["src"] = (
"{}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{}.".format( # base64 of 'header' is precomputed as 'eyJ0eX...'
url, f"{url}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{pl}.")
# base64 of 'header' is precomputed as 'eyJ0eX...'
# binascii.b2a_base64(header).rstrip(b"=\n").decode(),
binascii.b2a_base64(payload).rstrip(b"=\n").decode())
)
def _extract_comments(self, target_id, target_type="deviation"): def _extract_comments(self, target_id, target_type="deviation"):
results = None results = None

View File

@@ -22,8 +22,6 @@ class DiscordExtractor(Extractor):
filename_fmt = "{message_id}_{num:>02}_{filename}.{extension}" filename_fmt = "{message_id}_{num:>02}_{filename}.{extension}"
archive_fmt = "{message_id}_{num}" archive_fmt = "{message_id}_{num}"
cdn_fmt = "https://cdn.discordapp.com/{}/{}/{}.png?size=4096"
server_metadata = {} server_metadata = {}
server_channels_metadata = {} server_channels_metadata = {}
@@ -86,11 +84,10 @@ class DiscordExtractor(Extractor):
): ):
if message["author"].get(icon_type): if message["author"].get(icon_type):
message_metadata["author_files"].append({ message_metadata["author_files"].append({
"url": self.cdn_fmt.format( "url": (f"https://cdn.discordapp.com/{icon_path}/"
icon_path, f"{message_metadata['author_id']}/"
message_metadata["author_id"], f"{message['author'][icon_type]}.png"
message["author"][icon_type] f"?size=4096"),
),
"filename": icon_type, "filename": icon_type,
"extension": "png", "extension": "png",
}) })
@@ -222,11 +219,9 @@ class DiscordExtractor(Extractor):
): ):
if server.get(icon_type): if server.get(icon_type):
self.server_metadata["server_files"].append({ self.server_metadata["server_files"].append({
"url": self.cdn_fmt.format( "url": (f"https://cdn.discordapp.com/{icon_path}/"
icon_path, f"{self.server_metadata['server_id']}/"
self.server_metadata["server_id"], f"{server[icon_type]}.png?size=4096"),
server[icon_type]
),
"filename": icon_type, "filename": icon_type,
"extension": "png", "extension": "png",
}) })

View File

@@ -20,9 +20,6 @@ class FacebookExtractor(Extractor):
filename_fmt = "{id}.{extension}" filename_fmt = "{id}.{extension}"
archive_fmt = "{id}.{extension}" archive_fmt = "{id}.{extension}"
set_url_fmt = root + "/media/set/?set={set_id}"
photo_url_fmt = root + "/photo/?fbid={photo_id}&set={set_id}"
def _init(self): def _init(self):
headers = self.session.headers headers = self.session.headers
headers["Accept"] = ( headers["Accept"] = (
@@ -242,9 +239,7 @@ class FacebookExtractor(Extractor):
while i < len(all_photo_ids): while i < len(all_photo_ids):
photo_id = all_photo_ids[i] photo_id = all_photo_ids[i]
photo_url = self.photo_url_fmt.format( photo_url = f"{self.root}/photo/?fbid={photo_id}&set={set_id}"
photo_id=photo_id, set_id=set_id
)
photo_page = self.photo_page_request_wrapper(photo_url).text photo_page = self.photo_page_request_wrapper(photo_url).text
photo = self.parse_photo_page(photo_page) photo = self.parse_photo_page(photo_page)
@@ -317,7 +312,7 @@ class FacebookSetExtractor(FacebookExtractor):
post_page = self.request(post_url).text post_page = self.request(post_url).text
set_id = self.parse_post_page(post_page)["set_id"] set_id = self.parse_post_page(post_page)["set_id"]
set_url = self.set_url_fmt.format(set_id=set_id) set_url = f"{self.root}/media/set/?set={set_id}"
set_page = self.request(set_url).text set_page = self.request(set_url).text
set_data = self.parse_set_page(set_page) set_data = self.parse_set_page(set_page)
if self.groups[2]: if self.groups[2]:
@@ -336,16 +331,15 @@ class FacebookPhotoExtractor(FacebookExtractor):
def items(self): def items(self):
photo_id = self.groups[0] photo_id = self.groups[0]
photo_url = self.photo_url_fmt.format(photo_id=photo_id, set_id="") photo_url = f"{self.root}/photo/?fbid={photo_id}&set="
photo_page = self.photo_page_request_wrapper(photo_url).text photo_page = self.photo_page_request_wrapper(photo_url).text
i = 1 i = 1
photo = self.parse_photo_page(photo_page) photo = self.parse_photo_page(photo_page)
photo["num"] = i photo["num"] = i
set_page = self.request( set_url = f"{self.root}/media/set/?set={photo['set_id']}"
self.set_url_fmt.format(set_id=photo["set_id"]) set_page = self.request(set_url).text
).text
directory = self.parse_set_page(set_page) directory = self.parse_set_page(set_page)
@@ -356,9 +350,7 @@ class FacebookPhotoExtractor(FacebookExtractor):
for comment_photo_id in photo["followups_ids"]: for comment_photo_id in photo["followups_ids"]:
comment_photo = self.parse_photo_page( comment_photo = self.parse_photo_page(
self.photo_page_request_wrapper( self.photo_page_request_wrapper(
self.photo_url_fmt.format( f"{self.root}/photo/?fbid={comment_photo_id}&set="
photo_id=comment_photo_id, set_id=""
)
).text ).text
) )
i += 1 i += 1
@@ -426,7 +418,7 @@ class FacebookProfileExtractor(FacebookExtractor):
set_id = self.get_profile_photos_set_id(profile_photos_page) set_id = self.get_profile_photos_set_id(profile_photos_page)
if set_id: if set_id:
set_url = self.set_url_fmt.format(set_id=set_id) set_url = f"{self.root}/media/set/?set={set_id}"
set_page = self.request(set_url).text set_page = self.request(set_url).text
set_data = self.parse_set_page(set_page) set_data = self.parse_set_page(set_page)
return self.extract_set(set_data) return self.extract_set(set_data)

View File

@@ -337,8 +337,8 @@ class FanboxExtractor(Extractor):
elif provider == "twitter": elif provider == "twitter":
url = "https://twitter.com/_/status/"+content_id url = "https://twitter.com/_/status/"+content_id
elif provider == "google_forms": elif provider == "google_forms":
templ = "https://docs.google.com/forms/d/e/{}/viewform?usp=sf_link" url = (f"https://docs.google.com/forms/d/e/"
url = templ.format(content_id) f"{content_id}/viewform?usp=sf_link")
else: else:
self.log.warning(f"service not recognized: {provider}") self.log.warning(f"service not recognized: {provider}")

View File

@@ -324,11 +324,12 @@ class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor):
example = "https://www.furaffinity.net/user/USER/" example = "https://www.furaffinity.net/user/USER/"
def items(self): def items(self):
base = f"{self.root}/{{}}/{self.user}/" base = self.root
user = f"{self.user}/"
return self._dispatch_extractors(( return self._dispatch_extractors((
(FuraffinityGalleryExtractor , base.format("gallery")), (FuraffinityGalleryExtractor , f"{base}/gallery/{user}"),
(FuraffinityScrapsExtractor , base.format("scraps")), (FuraffinityScrapsExtractor , f"{base}/scraps/{user}"),
(FuraffinityFavoriteExtractor, base.format("favorites")), (FuraffinityFavoriteExtractor, f"{base}/favorites/{user}"),
), ("gallery",)) ), ("gallery",))

View File

@@ -39,18 +39,15 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
doc["date"] = text.parse_datetime( doc["date"] = text.parse_datetime(
doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ") doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ")
self._cnt = text.parse_int(doc["pageCount"]) self.count = text.parse_int(doc["pageCount"])
self._tpl = "https://{}/{}-{}/jpg/page_{{}}.jpg".format( self.base = (f"https://image.isu.pub/{doc['revisionId']}-"
"image.isu.pub", # data["config"]["hosts"]["image"], f"{doc['publicationId']}/jpg/page_")
doc["revisionId"],
doc["publicationId"],
)
return {"document": doc} return {"document": doc}
def images(self, page): def images(self, page):
fmt = self._tpl.format return [(f"{self.base}{i}.jpg", None)
return [(fmt(i), None) for i in range(1, self._cnt + 1)] for i in range(1, self.count + 1)]
class IssuuUserExtractor(IssuuBase, Extractor): class IssuuUserExtractor(IssuuBase, Extractor):

View File

@@ -354,7 +354,7 @@ class MangadexAPI():
self.extractor.wait(until=until) self.extractor.wait(until=until)
continue continue
msg = ", ".join('{title}: "{detail}"'.format_map(error) msg = ", ".join(f'{error["title"]}: "{error["detail"]}"'
for error in response.json()["errors"]) for error in response.json()["errors"])
raise exception.AbortExtraction( raise exception.AbortExtraction(
f"{response.status_code} {response.reason} ({msg})") f"{response.status_code} {response.reason} ({msg})")

View File

@@ -17,7 +17,6 @@ class MangahereBase():
category = "mangahere" category = "mangahere"
root = "https://www.mangahere.cc" root = "https://www.mangahere.cc"
root_mobile = "https://m.mangahere.cc" root_mobile = "https://m.mangahere.cc"
url_fmt = root_mobile + "/manga/{}/{}.html"
class MangahereChapterExtractor(MangahereBase, ChapterExtractor): class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
@@ -28,8 +27,8 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
def __init__(self, match): def __init__(self, match):
self.part, self.volume, self.chapter = match.groups() self.part, self.volume, self.chapter = match.groups()
url = self.url_fmt.format(self.part, 1) self.base = f"{self.root_mobile}/manga/{self.part}/"
ChapterExtractor.__init__(self, match, url) ChapterExtractor.__init__(self, match, f"{self.base}1.html")
def _init(self): def _init(self):
self.session.headers["Referer"] = self.root_mobile + "/" self.session.headers["Referer"] = self.root_mobile + "/"
@@ -64,7 +63,7 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
url, pos = text.extract(page, ' src="', '"', pos) url, pos = text.extract(page, ' src="', '"', pos)
yield text.ensure_http_scheme(text.unescape(url)), None yield text.ensure_http_scheme(text.unescape(url)), None
pnum += 2 pnum += 2
page = self.request(self.url_fmt.format(self.part, pnum)).text page = self.request(f"{self.base}{pnum}.html").text
def _get_title(self): def _get_title(self):
url = f"{self.root}/manga/{self.part}/" url = f"{self.root}/manga/{self.part}/"

View File

@@ -214,19 +214,17 @@ class OAuthBase(Extractor):
("These values have", "these values", "are", "them") ("These values have", "these values", "are", "them")
) )
msg = "\nYour {} {}\n\n{}\n\n".format( key = " and ".join(f"'{n}'" for n in names)
" and ".join("'" + n + "'" for n in names), val = "\n".join(values)
_is, msg = f"\nYour {key} {_is}\n\n{val}\n\n"
"\n".join(values),
)
opt = self.oauth_config(names[0]) opt = self.oauth_config(names[0])
if self.cache and (opt is None or opt == "cache"): if self.cache and (opt is None or opt == "cache"):
msg += _vh + " been cached and will automatically be used.\n" msg += _vh + " been cached and will automatically be used.\n"
else: else:
msg += "Put " + _va + " into your configuration file as \n" msg += f"Put {_va} into your configuration file as \n"
msg += " and\n".join( msg += " and\n".join(
"'extractor." + self.subcategory + "." + n + "'" f"'extractor.{self.subcategory}.{n}'"
for n in names for n in names
) )
if self.cache: if self.cache:

View File

@@ -19,11 +19,10 @@ class PixnetExtractor(Extractor):
category = "pixnet" category = "pixnet"
filename_fmt = "{num:>03}_{id}.{extension}" filename_fmt = "{num:>03}_{id}.{extension}"
archive_fmt = "{id}" archive_fmt = "{id}"
url_fmt = ""
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.blog, self.item_id = match.groups() self.blog, self.item_id = self.groups
self.root = f"https://{self.blog}.pixnet.net" self.root = f"https://{self.blog}.pixnet.net"
def items(self): def items(self):
@@ -91,14 +90,13 @@ class PixnetImageExtractor(PixnetExtractor):
class PixnetSetExtractor(PixnetExtractor): class PixnetSetExtractor(PixnetExtractor):
"""Extractor for images from a pixnet set""" """Extractor for images from a pixnet set"""
subcategory = "set" subcategory = "set"
url_fmt = "{}/album/set/{}"
directory_fmt = ("{category}", "{blog}", directory_fmt = ("{category}", "{blog}",
"{folder_id} {folder_title}", "{set_id} {set_title}") "{folder_id} {folder_title}", "{set_id} {set_title}")
pattern = BASE_PATTERN + r"/album/set/(\d+)" pattern = BASE_PATTERN + r"/album/set/(\d+)"
example = "https://USER.pixnet.net/album/set/12345" example = "https://USER.pixnet.net/album/set/12345"
def items(self): def items(self):
url = self.url_fmt.format(self.root, self.item_id) url = f"{self.root}/album/set/{self.item_id}"
page = self.request(url, encoding="utf-8").text page = self.request(url, encoding="utf-8").text
data = self.metadata(page) data = self.metadata(page)

View File

@@ -50,9 +50,11 @@ class WikifeetGalleryExtractor(GalleryExtractor):
"S": "Soles", "S": "Soles",
"B": "Barefoot", "B": "Barefoot",
} }
ufmt = "https://pics.wikifeet.com/" + self.celeb + "-Feet-{}.jpg"
gallery = text.extr(page, '"gallery":[', '],')
base = f"https://pics.wikifeet.com/{self.celeb}-Feet-"
return [ return [
(ufmt.format(data["pid"]), { (f"{base}{data['pid']}.jpg", {
"pid" : data["pid"], "pid" : data["pid"],
"width" : data["pw"], "width" : data["pw"],
"height": data["ph"], "height": data["ph"],
@@ -61,6 +63,5 @@ class WikifeetGalleryExtractor(GalleryExtractor):
for tag in data["tags"] if tag in tagmap for tag in data["tags"] if tag in tagmap
], ],
}) })
for data in for data in util.json_loads(f"[{gallery}]")
util.json_loads("[" + text.extr(page, '"gallery":[', '],') + "]")
] ]

View File

@@ -47,9 +47,6 @@ class XfolioWorkExtractor(XfolioExtractor):
subcategory = "work" subcategory = "work"
pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/works/(\d+)" pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/works/(\d+)"
example = "https://xfolio.jp/portfolio/USER/works/12345" example = "https://xfolio.jp/portfolio/USER/works/12345"
ref_fmt = ("{}/fullscale_image?image_id={}&work_id={}")
url_fmt = ("{}/user_asset.php?id={}&work_id={}"
"&work_image_id={}&type=work_image")
def items(self): def items(self):
creator, work_id = self.groups creator, work_id = self.groups
@@ -98,10 +95,11 @@ class XfolioWorkExtractor(XfolioExtractor):
files.append({ files.append({
"image_id" : image_id, "image_id" : image_id,
"extension": "jpg", "extension": "jpg",
"url": self.url_fmt.format( "url": (f"{self.root}/user_asset.php?id={image_id}&work_id="
self.root, image_id, work_id, image_id), f"{work_id}&work_image_id={image_id}&type=work_image"),
"_http_headers": {"Referer": self.ref_fmt.format( "_http_headers": {"Referer": (
self.root, image_id, work_id)}, f"{self.root}/fullscale_image"
f"?image_id={image_id}&work_id={work_id}")},
}) })
return files return files