[2chan] skip external links

This commit is contained in:
Mike Fährmann
2020-11-24 16:41:47 +01:00
parent 547107307e
commit 93ce7466e2

View File

@@ -21,9 +21,9 @@ class _2chanThreadExtractor(Extractor):
archive_fmt = "{board}_{thread}_{tim}"
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)"
test = ("http://dec.2chan.net/70/res/11048.htm", {
"url": "2ecf919139bd5d915930530b3576d67c388a2a49",
"keyword": "8def4ec98a89fd4fff8bbcbae603604dcb4a3bb9",
test = ("http://dec.2chan.net/70/res/4752.htm", {
"url": "1c2d2ce8aea0fc71d94735cfc30009d628f33548",
"keyword": "f508d6841ea2cb19ed799aac9dc580263ca50651",
})
def __init__(self, match):
@@ -70,7 +70,8 @@ class _2chanThreadExtractor(Extractor):
data = self._extract_post(post)
if data["name"]:
data["name"] = data["name"].strip()
if '<a href="/' in post:
path = text.extract(post, '<a href="/', '"')[0]
if path and not path.startswith("bin/jump"):
self._extract_image(post, data)
data["tim"], _, data["extension"] = data["filename"].partition(".")
data["time"] = data["tim"][:-3]