[blogger] handle URLs with specified width/height (closes #1061)

get highest quality for images with
/wXXX-hXXX/ instead of the usual /sXXX/
This commit is contained in:
Mike Fährmann
2020-10-13 21:16:48 +02:00
parent 783e0af26d
commit 6491db3eaf

View File

@@ -42,7 +42,7 @@ class BloggerExtractor(Extractor):
blog["date"] = text.parse_datetime(blog["published"])
del blog["selfLink"]
sub = re.compile(r"/s\d+/").sub
sub = re.compile(r"/(?:s\d+|w\d+-h\d+)/").sub
findall_image = re.compile(
r'src="(https?://\d+\.bp\.blogspot\.com/[^"]+)').findall
findall_video = re.compile(
@@ -134,6 +134,10 @@ class BloggerPostExtractor(BloggerExtractor):
"cfnm-scene-jenna-fischer-in-office.html"), {
"pattern": r"https://.+\.googlevideo\.com/videoplayback",
}),
# image URLs with width/height (#1061)
("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", {
"pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png",
}),
)
def __init__(self, match):