[livedoor] fix adjustments for https:// URLs

This commit is contained in:
Mike Fährmann
2019-05-25 23:35:20 +02:00
parent 2316e0ed3d
commit e88824e1a7

View File

@@ -16,7 +16,6 @@ class LivedoorExtractor(Extractor):
"""Base class for livedoor extractors""" """Base class for livedoor extractors"""
category = "livedoor" category = "livedoor"
root = "http://blog.livedoor.jp" root = "http://blog.livedoor.jp"
img_root = "http://livedoor.blogimg.jp"
filename_fmt = "{post[id]}_{post[title]}_{num:>02}.{extension}" filename_fmt = "{post[id]}_{post[title]}_{num:>02}.{extension}"
directory_fmt = ("{category}", "{post[user]}") directory_fmt = ("{category}", "{post[user]}")
archive_fmt = "{post[id]}_{hash}" archive_fmt = "{post[id]}_{hash}"
@@ -59,7 +58,7 @@ class LivedoorExtractor(Extractor):
src = text.extract(img, 'src="', '"')[0] src = text.extract(img, 'src="', '"')[0]
alt = text.extract(img, 'alt="', '"')[0] alt = text.extract(img, 'alt="', '"')[0]
if src.startswith(self.img_root): if "://livedoor.blogimg.jp/" in src:
url = src.replace("-s.", ".") url = src.replace("-s.", ".")
else: else:
url = text.urljoin(self.root, src) url = text.urljoin(self.root, src)
@@ -84,7 +83,7 @@ class LivedoorBlogExtractor(LivedoorExtractor):
test = ("http://blog.livedoor.jp/zatsu_ke/", { test = ("http://blog.livedoor.jp/zatsu_ke/", {
"range": "1-50", "range": "1-50",
"count": 50, "count": 50,
"pattern": r"http://livedoor.blogimg.jp/zatsu_ke/imgs/\w/\w/\w+\.\w+", "pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+",
"keyword": { "keyword": {
"post": { "post": {
"categories": list, "categories": list,