[2chan] skip external links
This commit is contained in:
@@ -21,9 +21,9 @@ class _2chanThreadExtractor(Extractor):
|
||||
archive_fmt = "{board}_{thread}_{tim}"
|
||||
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
|
||||
pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)"
|
||||
test = ("http://dec.2chan.net/70/res/11048.htm", {
|
||||
"url": "2ecf919139bd5d915930530b3576d67c388a2a49",
|
||||
"keyword": "8def4ec98a89fd4fff8bbcbae603604dcb4a3bb9",
|
||||
test = ("http://dec.2chan.net/70/res/4752.htm", {
|
||||
"url": "1c2d2ce8aea0fc71d94735cfc30009d628f33548",
|
||||
"keyword": "f508d6841ea2cb19ed799aac9dc580263ca50651",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -70,7 +70,8 @@ class _2chanThreadExtractor(Extractor):
|
||||
data = self._extract_post(post)
|
||||
if data["name"]:
|
||||
data["name"] = data["name"].strip()
|
||||
if '<a href="/' in post:
|
||||
path = text.extract(post, '<a href="/', '"')[0]
|
||||
if path and not path.startswith("bin/jump"):
|
||||
self._extract_image(post, data)
|
||||
data["tim"], _, data["extension"] = data["filename"].partition(".")
|
||||
data["time"] = data["tim"][:-3]
|
||||
|
||||
Reference in New Issue
Block a user