From ff436692bf25ce20fd52fd682665ca9574c9c11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 16 Jul 2018 18:14:41 +0200 Subject: [PATCH] ["deviantart] add 'journals' option --- docs/configuration.rst | 15 +++++++- docs/gallery-dl.conf | 4 +- gallery_dl/extractor/deviantart.py | 59 ++++++++++++++++++++++++------ gallery_dl/extractor/imgur.py | 4 +- 4 files changed, 66 insertions(+), 16 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 85aca3d0..d9140bea 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -294,6 +294,19 @@ Description Select the directory structure created by the Gallery- and =========== ===== +extractor.deviantart.journals +----------------------------- +=========== ===== +Type ``string`` +Default ``"html"`` +Description Selects the output format of journal entries. + + - ``"html"``: HTML with (roughly) the same layout as on DeviantArt. + - ``"text"``: Plain text with image references and HTML tags removed. + - ``"none"``: Don't download journals. +=========== ===== + + extractor.deviantart.mature --------------------------- =========== ===== @@ -338,7 +351,7 @@ extractor.deviantart.wait-min =========== ===== Type ``int`` Default ``0`` -Description Minimum wait time in seconds before any API request. +Description Minimum wait time in seconds before API requests. Note: This value will internally be rounded up to the next power of 2. diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index b49d0e1b..2cf2868c 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -18,8 +18,10 @@ { "refresh-token": null, "flat": true, + "journals": "html", "mature": true, - "original": true + "original": true, + "wait-min": 0 }, "exhentai": { diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index c097e5da..91b9f577 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -21,7 +21,7 @@ import re BASE_PATTERN = ( r"(?:https?://)?(?:" r"(?:www\.)?deviantart\.com/([\w-]+)|" - r"(?!www\.)([\w-]+)\.deviantart\.com)" + r"([\w-]+)\.deviantart\.com)" ) @@ -41,6 +41,11 @@ class DeviantartExtractor(Extractor): self.user = match.group(1) or match.group(2) if match else None self.group = False + self.commit_journal = { + "html": self._commit_journal_html, + "text": self._commit_journal_text, + }.get(self.config("journals", "html")) + def skip(self, num): self.offset += num return num @@ -77,7 +82,7 @@ class DeviantartExtractor(Extractor): if "flash" in deviation: yield self.commit(deviation, deviation["flash"]) - if "excerpt" in deviation: + if "excerpt" in deviation and self.commit_journal: journal = self.api.deviation_content(deviation["deviationid"]) yield self.commit_journal(deviation, journal) @@ -94,7 +99,6 @@ class DeviantartExtractor(Extractor): deviation["index"] = deviation["url"].rpartition("-")[2] except KeyError: deviation["index"] = 0 - if self.user: deviation["username"] = self.user deviation["da_category"] = deviation["category"] @@ -108,7 +112,7 @@ class DeviantartExtractor(Extractor): url = "https:" + url[5:] return Message.Url, url, deviation - def commit_journal(self, deviation, journal): + def _commit_journal_html(self, deviation, journal): title = text.escape(deviation["title"]) url = deviation["url"] thumbs = deviation["thumbs"] @@ -142,11 +146,11 @@ class DeviantartExtractor(Extractor): url=url, userurl="{}/{}/".format(self.root, deviation["username"]), username=deviation["author"]["username"], - date=str(date), + date=date, categories=categories, ) - html = JOURNAL_TEMPLATE.format( + html = JOURNAL_TEMPLATE_HTML.format( title=title, html=html.replace(needle, header, 1), shadow=shadow, @@ -157,6 +161,23 @@ class DeviantartExtractor(Extractor): deviation["extension"] = "htm" return Message.Url, html, deviation + @staticmethod + def _commit_journal_text(deviation, journal): + date = datetime.datetime.utcfromtimestamp(deviation["published_time"]) + content = "\n".join( + text.unescape(text.remove_html(txt)) + for txt in journal["html"].rpartition("") + ) + txt = JOURNAL_TEMPLATE_TEXT.format( + title=deviation["title"], + username=deviation["author"]["username"], + date=date, + content=content, + ) + + deviation["extension"] = "txt" + return Message.Url, txt, deviation + @staticmethod def _find_folder(folders, name): pattern = r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$" @@ -246,12 +267,12 @@ class DeviantartDeviationExtractor(DeviantartExtractor): subcategory = "deviation" archive_fmt = "{index}.{extension}" pattern = [BASE_PATTERN + r"/(?:art|journal)/[^/?&#]+-\d+", - r"(?:https?://)?(sta\.sh/[a-z0-9]+)"] + r"(?:https?://)?sta\.sh/()()[a-z0-9]+"] test = [ (("https://www.deviantart.com/shimoda7/art/" "For-the-sake-of-a-memory-10073852"), { "url": "eef0c01b3808c535ea673e7b3654ab5209b910b7", - "keyword": "b7ed053c3fb54b93c90e5ff8ed9f7a11d47a9c74", + "keyword": "925217229da46aeb8ce282675dc8639fa20a892c", "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", }), ("https://www.deviantart.com/zzz/art/zzz-1234567890", { @@ -277,7 +298,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): ] def __init__(self, match): - DeviantartExtractor.__init__(self) + DeviantartExtractor.__init__(self, match) self.url = match.group(0) if not self.url.startswith("http"): self.url = "https://" + self.url @@ -310,10 +331,10 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): def deviations(self): folders = self.api.collections_folders(self.user) if self.flat: - return itertools.chain.from_iterable([ + return itertools.chain.from_iterable( self.api.collections(self.user, folder["folderid"]) for folder in folders - ]) + ) else: return self._folder_urls(folders, "favourites") @@ -363,6 +384,14 @@ class DeviantartJournalExtractor(DeviantartExtractor): "url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44", "keyword": "8d11b458f389188cc1f00d09694ce4e00c43efcc", }), + ("https://www.deviantart.com/angrywhitewanker/journal/", { + "url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e", + "options": (("journals", "text"),), + }), + ("https://www.deviantart.com/angrywhitewanker/journal/", { + "count": 0, + "options": (("journals", "none"),), + }), ("https://www.deviantart.com/shimoda7/journal/?catpath=/", None), ("https://angrywhitewanker.deviantart.com/journal/", None), ("https://shimoda7.deviantart.com/journal/?catpath=/", None), @@ -629,7 +658,7 @@ HEADER_CUSTOM_TEMPLATE = """
Journal Entry: {date} """ -JOURNAL_TEMPLATE = """text: +JOURNAL_TEMPLATE_HTML = """text: @@ -676,3 +705,9 @@ roses/cssmin/desktop.css?1491362542749" > """ + +JOURNAL_TEMPLATE_TEXT = """text:{title} +by {username}, {date} + +{content} +""" diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index c723de49..8e77713b 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -155,8 +155,8 @@ class ImgurAlbumExtractor(ImgurExtractor): ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash "url": "695ef0c950023362a0163ee5041796300db76674", }), - ("https://imgur.com/t/unmuted/FVyxO32", { # unmuted URL - "url": "1df12d96438ad9018ace7665dc893419ce9ec867", + ("https://imgur.com/t/unmuted/YMqBcua", { # unmuted URL + "url": "86b4747f8147cec7602f0214e267309af73a8655", }), ("https://imgur.com/a/TcBmQ", { "exception": exception.NotFoundError,