diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py index 61a36d5a..9db41e0c 100644 --- a/gallery_dl/extractor/warosu.py +++ b/gallery_dl/extractor/warosu.py @@ -18,7 +18,7 @@ class WarosuThreadExtractor(Extractor): subcategory = "thread" root = "https://warosu.org" directory_fmt = ("{category}", "{board}", "{thread} - {title}") - filename_fmt = "{tim}-{filename}.{extension}" + filename_fmt = "{tim} {filename}.{extension}" archive_fmt = "{board}_{thread}_{tim}" pattern = r"(?:https?://)?(?:www\.)?warosu\.org/([^/]+)/thread/(\d+)" example = "https://warosu.org/a/thread/12345" @@ -47,7 +47,7 @@ class WarosuThreadExtractor(Extractor): def metadata(self, page): boardname = text.extr(page, "", "") - title = text.unescape(text.extr(page, "class=filetitle>", "<")) + title = text.unescape(text.extr(page, "class=\"filetitle\">", "<")) return { "board" : self.board, "board_name": boardname.split(" - ")[1], @@ -57,14 +57,15 @@ class WarosuThreadExtractor(Extractor): def posts(self, page): """Build a list of all post objects""" - page = text.extr(page, "
") + page = text.extr(page, "
") needle = "" return [self.parse(post) for post in page.split(needle)] def parse(self, post): """Build post object by extracting data from an HTML post""" data = self._extract_post(post) - if "" in post and self._extract_image(post, data): + if "" in post and \ + self._extract_image(post, data): part = data["image"].rpartition("/")[2] data["tim"], _, data["extension"] = part.partition(".") data["ext"] = "." + data["extension"] @@ -73,9 +74,9 @@ class WarosuThreadExtractor(Extractor): def _extract_post(self, post): extr = text.extract_from(post) return { - "no" : extr("id=p", ">"), - "name": extr("class=postername>", "<").strip(), - "time": extr("class=posttime title=", "000>"), + "no" : extr("id=\"p", "\""), + "name": extr("class=\"postername \">", "<").strip(), + "time": extr("class=\"posttime\" title=\"", "000\">"), "now" : extr("", "<").strip(), "com" : text.unescape(text.remove_html(extr( "
", "
").strip())), @@ -83,14 +84,15 @@ class WarosuThreadExtractor(Extractor): def _extract_image(self, post, data): extr = text.extract_from(post) - data["fsize"] = extr(" File: ", ", ") + extr("", "") + data["fsize"] = extr("File: ", ", ") data["w"] = extr("", "x") data["h"] = extr("", ", ") data["filename"] = text.unquote(extr( "", "<").rstrip().rpartition(".")[0]) - extr("
", "") + extr("
", "") - url = extr("") + url = extr("") if url: if url[0] == "/": data["image"] = self.root + url