set 'extension' keyword whenever possible

This commit is contained in:
Mike Fährmann
2016-10-07 15:11:58 +02:00
parent be73af551e
commit f7c993d952
4 changed files with 12 additions and 6 deletions

View File

@@ -16,7 +16,7 @@ class FourchanThreadExtractor(chan.ChanThreadExtractor):
pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"] pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"]
test = [("https://boards.4chan.org/tg/thread/15396072/", { test = [("https://boards.4chan.org/tg/thread/15396072/", {
"url": "39082ad166161966d7ba8e37f2173a824eb540f0", "url": "39082ad166161966d7ba8e37f2173a824eb540f0",
"keyword": "38679a7c8054f535cba67cae13eef1ea7dbc8085", "keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
"content": "3081ed85a5afaeb3f430f42540e7bb5eec1908cc", "content": "3081ed85a5afaeb3f430f42540e7bb5eec1908cc",
})] })]
api_url = "https://a.4cdn.org/{board}/thread/{thread}.json" api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"

View File

@@ -16,8 +16,8 @@ class InfinitychanThreadExtractor(chan.ChanThreadExtractor):
pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"] pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"]
test = [("https://8ch.net/tg/res/175887.html", { test = [("https://8ch.net/tg/res/175887.html", {
"url": "cb03fdc650ad8e796fdab553fbd5489f468d3f45", "url": "cb03fdc650ad8e796fdab553fbd5489f468d3f45",
"keyword": "c2a7f57422558dddaf3467b9a30018e847eb4fad", "keyword": "d313fa77f689ddcd45736f1e86bf63cf58614786",
"content": "9f51cdfee6942a18011996ca049baeb0a22f931b", "content": "81e21a3cc87f64f224a966f207e8e1731216c345",
})] })]
api_url = "https://8ch.net/{board}/res/{thread}.json" api_url = "https://8ch.net/{board}/res/{thread}.json"
file_url = "https://8ch.net/{board}/src/{tim}{ext}" file_url = "https://8ch.net/{board}/src/{tim}{ext}"

View File

@@ -36,13 +36,18 @@ class ChanThreadExtractor(Extractor):
for post in posts: for post in posts:
if "filename" not in post: if "filename" not in post:
continue continue
post.update(self.metadata) self.update(post)
yield Message.Url, self.file_url.format_map(post), post yield Message.Url, self.file_url.format_map(post), post
if "extra_files" in post: if "extra_files" in post:
for file in post["extra_files"]: for file in post["extra_files"]:
post.update(file) self.update(post, file)
yield Message.Url, self.file_url.format_map(post), post yield Message.Url, self.file_url.format_map(post), post
def update(self, post, data=None):
"""Update keyword dictionary"""
post.update(data or self.metadata)
post["extension"] = post["ext"][1:]
@staticmethod @staticmethod
def get_thread_title(post): def get_thread_title(post):
"""Return thread title from first post""" """Return thread title from first post"""

View File

@@ -20,7 +20,7 @@ class ImgurAlbumExtractor(Extractor):
pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"] pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
test = [("https://imgur.com/a/TcBmP", { test = [("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": "8301572a22c139b5e0704ccaf2bcf49a111e2384", "keyword": "21723f47bf4a42599d39fbf29c5f79323d420898",
})] })]
def __init__(self, match): def __init__(self, match):
@@ -34,6 +34,7 @@ class ImgurAlbumExtractor(Extractor):
yield Message.Directory, data yield Message.Directory, data
for num, image in enumerate(imgs, 1): for num, image in enumerate(imgs, 1):
image["num"] = num image["num"] = num
image["extension"] = image["ext"][1:]
image.update(data) image.update(data)
url = "https://i.imgur.com/" + image["hash"] + image["ext"] url = "https://i.imgur.com/" + image["hash"] + image["ext"]
yield Message.Url, url, image yield Message.Url, url, image