From 4bc756dfe09360ec88453140d271f48bfbdcc35f Mon Sep 17 00:00:00 2001
From: enduser420 <91022934+enduser420@users.noreply.github.com>
Date: Sun, 4 Dec 2022 20:49:36 +0530
Subject: [PATCH] [2chen] fix extraction (#3356)
update 'archive_fmt'
update tests
update 'board' regex
---
gallery_dl/extractor/2chen.py | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py
index 8fffeb0b..76a085a6 100644
--- a/gallery_dl/extractor/2chen.py
+++ b/gallery_dl/extractor/2chen.py
@@ -16,13 +16,15 @@ class _2chenThreadExtractor(Extractor):
subcategory = "thread"
directory_fmt = ("{category}", "{board}", "{thread} {title}")
filename_fmt = "{time} {filename}.{extension}"
- archive_fmt = "{board}_{thread}_{hash}"
+ archive_fmt = "{board}_{thread}_{hash}_{time}"
root = "https://2chen.moe"
pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)/(\d+)"
test = (
- ("https://2chen.moe/jp/303786", {
- "count": ">= 10",
+ ("https://2chen.moe/tv/496715", {
+ "count": ">= 179",
}),
+ # 404
+ ("https://2chen.moe/jp/303786"),
)
def __init__(self, match):
@@ -31,7 +33,7 @@ class _2chenThreadExtractor(Extractor):
def items(self):
url = "{}/{}/{}".format(self.root, self.board, self.thread)
- page = self.request(url, encoding="utf-8").text
+ page = self.request(url, encoding="utf-8", notfound="thread").text
data = self.metadata(page)
yield Message.Directory, data
for post in self.posts(page):
@@ -66,7 +68,7 @@ class _2chenThreadExtractor(Extractor):
"%d %b %Y (%a) %H:%M:%S"
),
"no" : extr('href="#p', '"'),
- "url" : extr('