From 8f621b32bd28922b506bce2bd7952c1ccdd2f46e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 13 Dec 2025 18:08:50 +0100 Subject: [PATCH] [2chen] implement generic 2chen board extractors support - https://sturdychan.help/ - https://schan.help/ (#8680) --- docs/supportedsites.md | 22 +++++--- gallery_dl/extractor/2chen.py | 67 ++++++++++++------------ test/results/schan.py | 37 +++++++++++++ test/results/{2chen.py => sturdychan.py} | 14 ++--- 4 files changed, 93 insertions(+), 47 deletions(-) create mode 100644 test/results/schan.py rename test/results/{2chen.py => sturdychan.py} (79%) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a49e9be5..ef5f8ccb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -19,12 +19,6 @@ Consider all listed sites to potentially be NSFW. Boards, Threads - - 2chen - https://sturdychan.help/ - Boards, Threads - - 35PHOTO https://35photo.pro/ @@ -1274,6 +1268,22 @@ Consider all listed sites to potentially be NSFW. + + 2chen Instances + + + Sturdychan + https://sturdychan.help/ + Boards, Threads + + + + Schan + https://schan.help/ + Boards, Threads + + + Blogger Instances diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py index 8357bc46..4456fd69 100644 --- a/gallery_dl/extractor/2chen.py +++ b/gallery_dl/extractor/2chen.py @@ -1,40 +1,55 @@ # -*- coding: utf-8 -*- +# Copyright 2022-2025 Mike Fährmann +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://sturdychan.help/""" +"""Extractors for 2chen boards""" -from .common import Extractor, Message +from .common import BaseExtractor, Message from .. import text -BASE_PATTERN = r"(?:https?://)?(?:sturdychan.help|2chen\.(?:moe|club))" + +class _2chenExtractor(BaseExtractor): + basecategory = "2chen" -class _2chenThreadExtractor(Extractor): +BASE_PATTERN = _2chenExtractor.update({ + "sturdychan": { + "root": "https://sturdychan.help", + "pattern": r"(?:sturdychan\.help|2chen\.(?:moe|club))", + }, + "schan": { + "root": "https://schan.help/", + "pattern": r"schan\.help", + }, +}) + + +class _2chenThreadExtractor(_2chenExtractor): """Extractor for 2chen threads""" - category = "2chen" subcategory = "thread" - root = "https://sturdychan.help" directory_fmt = ("{category}", "{board}", "{thread} {title}") filename_fmt = "{time} {filename}.{extension}" - archive_fmt = "{board}_{thread}_{hash}_{time}" + archive_fmt = "{board}_{thread}_{no}_{time}" pattern = rf"{BASE_PATTERN}/([^/?#]+)/(\d+)" example = "https://sturdychan.help/a/12345/" - def __init__(self, match): - Extractor.__init__(self, match) - self.board, self.thread = match.groups() - def items(self): - url = f"{self.root}/{self.board}/{self.thread}" + board = self.groups[-2] + thread = self.kwdict["thread"] = self.groups[-1] + url = f"{self.root}/{board}/{thread}" page = self.request(url, encoding="utf-8", notfound="thread").text - data = self.metadata(page) - yield Message.Directory, "", data + self.kwdict["board"], pos = text.extract( + page, 'class="board">/', '/<') + self.kwdict["title"] = text.unescape(text.extract( + page, "

", "

", pos)[0]) + + yield Message.Directory, "", {} for post in self.posts(page): - url = post["url"] if not url: continue @@ -42,20 +57,10 @@ class _2chenThreadExtractor(Extractor): url = self.root + url post["url"] = url = url.partition("?")[0] - post.update(data) post["time"] = text.parse_int(post["date"].timestamp()) yield Message.Url, url, text.nameext_from_url( post["filename"], post) - def metadata(self, page): - board, pos = text.extract(page, 'class="board">/', '/<') - title = text.extract(page, "

", "

", pos)[0] - return { - "board" : board, - "thread": self.thread, - "title" : text.unescape(title), - } - def posts(self, page): """Return iterable with relevant posts""" return map(self.parse, text.extract_iter( @@ -70,26 +75,20 @@ class _2chenThreadExtractor(Extractor): "%d %b %Y (%a) %H:%M:%S" ), "no" : extr('href="#p', '"'), - "url" : extr('", "= 179", + + "board" : "tv", + "date" : "type:datetime", + "hash" : "", + "name" : "Anonymous", + "no" : r"re:\d+", + "thread": "757", + "time" : int, + "title" : "「/ttg/ #1: The Future of Schan」", + "url" : str, +}, + +{ + "#url" : "https://schan.help/tv/", + "#category": ("2chen", "schan", "board"), + "#class" : _2chen._2chenBoardExtractor, + "#pattern" : _2chen._2chenThreadExtractor.pattern, +}, + +) diff --git a/test/results/2chen.py b/test/results/sturdychan.py similarity index 79% rename from test/results/2chen.py rename to test/results/sturdychan.py index 132f75d7..37d55200 100644 --- a/test/results/2chen.py +++ b/test/results/sturdychan.py @@ -11,7 +11,7 @@ _2chen = getattr(gallery_dl.extractor, "2chen") __tests__ = ( { "#url" : "https://sturdychan.help/tv/268929", - "#category": ("", "2chen", "thread"), + "#category": ("2chen", "sturdychan", "thread"), "#class" : _2chen._2chenThreadExtractor, "#pattern" : r"https://sturdychan\.help/assets/images/src/\w{40}\.\w+$", "#count" : ">= 179", @@ -29,38 +29,38 @@ __tests__ = ( { "#url" : "https://2chen.club/tv/1", - "#category": ("", "2chen", "thread"), + "#category": ("2chen", "sturdychan", "thread"), "#class" : _2chen._2chenThreadExtractor, }, { "#url" : "https://2chen.moe/jp/303786", - "#category": ("", "2chen", "thread"), + "#category": ("2chen", "sturdychan", "thread"), "#class" : _2chen._2chenThreadExtractor, }, { "#url" : "https://sturdychan.help/co/", - "#category": ("", "2chen", "board"), + "#category": ("2chen", "sturdychan", "board"), "#class" : _2chen._2chenBoardExtractor, "#pattern" : _2chen._2chenThreadExtractor.pattern, }, { "#url" : "https://2chen.moe/co", - "#category": ("", "2chen", "board"), + "#category": ("2chen", "sturdychan", "board"), "#class" : _2chen._2chenBoardExtractor, }, { "#url" : "https://2chen.club/tv", - "#category": ("", "2chen", "board"), + "#category": ("2chen", "sturdychan", "board"), "#class" : _2chen._2chenBoardExtractor, }, { "#url" : "https://2chen.moe/co/catalog", - "#category": ("", "2chen", "board"), + "#category": ("2chen", "sturdychan", "board"), "#class" : _2chen._2chenBoardExtractor, },