diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py index 677680fa..bdedfcbc 100644 --- a/gallery_dl/extractor/warosu.py +++ b/gallery_dl/extractor/warosu.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://warosu.org/""" +"""Extractors for https://warosu.org/""" from .common import Extractor, Message from .. import text class WarosuThreadExtractor(Extractor): - """Extractor for images from threads on warosu.org""" + """Extractor for threads on warosu.org""" category = "warosu" subcategory = "thread" + root = "https://warosu.org" directory_fmt = ("{category}", "{board}", "{thread} - {title}") filename_fmt = "{tim}-{filename}.{extension}" archive_fmt = "{board}_{thread}_{tim}" @@ -31,7 +32,6 @@ class WarosuThreadExtractor(Extractor): "content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c", }), ) - root = "https://warosu.org" def __init__(self, match): Extractor.__init__(self, match) @@ -40,12 +40,12 @@ class WarosuThreadExtractor(Extractor): def items(self): url = "{}/{}/thread/{}".format(self.root, self.board, self.thread) page = self.request(url).text - data = self.get_metadata(page) + data = self.metadata(page) posts = self.posts(page) if not data["title"]: - title = text.remove_html(posts[0]["com"]) - data["title"] = text.unescape(title)[:50] + data["title"] = text.unescape(text.remove_html( + posts[0]["com"]))[:50] yield Message.Directory, data for post in posts: @@ -55,25 +55,24 @@ class WarosuThreadExtractor(Extractor): post.update(data) yield Message.Url, post["image"], post - def get_metadata(self, page): - """Collect metadata for extractor-job""" + def metadata(self, page): boardname = text.extr(page, "