diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index 7d6c826b..74d1c867 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -8,65 +8,25 @@
"""Extract image- and video-urls from threads on https://www.4chan.org/"""
-from .common import SequentialExtractor, Message
-from urllib.parse import unquote
-import re
+from .chan import ChanExtractor
info = {
"category": "4chan",
"extractor": "FourChanExtractor",
"directory": ["{category}", "{board}-{thread-id}"],
- "filename": "{timestamp}-{name}",
+ "filename": "{time}-{filename}{ext}",
"pattern": [
r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+).*",
],
}
-class FourChanExtractor(SequentialExtractor):
+class FourChanExtractor(ChanExtractor):
- url_fmt = "https://boards.4chan.org/{0}/res/{1}.html"
- regex = (
- r'[^"]+)" )?href="'
- r'(?P//i.4cdn.org/[^/]+/(?P\d+)\.(?P[^"]+))'
- r'" target="_blank">(?P[^<]+) '
- r'\((?P[^,]+), (?P\d+)x(?P\d+)\)'
- )
+ api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
+ file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
def __init__(self, match, config):
- SequentialExtractor.__init__(self, config)
- self.match = match
- self.metadata = None
-
- def items(self):
- yield Message.Version, 1
-
- url = self.url_fmt.format(*self.match.groups())
- text = self.request(url).text
- self.metadata = self.get_job_metadata(text)
-
- yield Message.Directory, self.metadata
- for match in re.finditer(self.regex, text):
- yield Message.Url, self.get_file_url(match), self.get_file_metadata(match)
-
- def get_job_metadata(self, text):
- """Collect metadata for extractor-job"""
- board, thread_id = self.match.groups()
- title, _ = self.extract(text, '"description" content="', ' - "/')
- return {
- "category": info["category"],
- "board": board,
- "thread-id": thread_id,
- "title": unquote(title),
- }
-
- def get_file_metadata(self, match):
- """Collect metadata for a downloadable file"""
- data = self.metadata
- data.update(match.groupdict(default=""))
- data["name"] = unquote(data["orig_name"] or data["name"])
- return data
-
- @staticmethod
- def get_file_url(match):
- """Extract download-url from 'match'"""
- return "https:" + match.group("url")
+ ChanExtractor.__init__(
+ self, config, info["category"],
+ match.group(1), match.group(2)
+ )