From cfc70a97ab960b2cd17f6e4ef3e76fb5b438ab0f Mon Sep 17 00:00:00 2001 From: Gio Date: Mon, 9 Dec 2019 00:56:27 -0600 Subject: [PATCH 1/5] Added an additional channel for downloading the metadata of an entire post or gallery. --- gallery_dl/extractor/message.py | 1 + gallery_dl/extractor/patreon.py | 4 ++++ gallery_dl/job.py | 10 ++++++++ gallery_dl/postprocessor/__init__.py | 1 + gallery_dl/postprocessor/metadata_bypost.py | 26 +++++++++++++++++++++ 5 files changed, 42 insertions(+) create mode 100644 gallery_dl/postprocessor/metadata_bypost.py diff --git a/gallery_dl/extractor/message.py b/gallery_dl/extractor/message.py index 1831620a..088fdd67 100644 --- a/gallery_dl/extractor/message.py +++ b/gallery_dl/extractor/message.py @@ -52,3 +52,4 @@ class Message(): # Cookies = 5 Queue = 6 Urllist = 7 + Metadata = 8 diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 9b13391f..60549983 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -69,6 +69,10 @@ class PatreonExtractor(Extractor): post["type"] = "content" yield Message.Url, url, text.nameext_from_url(url, post) + post.update({"metadata_only": True}) + url = post.get("creator").get("image_url") + yield Message.Metadata, url, text.nameext_from_url(url, post) + def posts(self): """Return all relevant post objects""" diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 7a1f1955..3241c7c4 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -97,6 +97,12 @@ class Job(): self.update_kwdict(kwds) self.handle_urllist(urls, kwds) + elif msg[0] == Message.Metadata: + _, url, kwds = msg + if self.pred_url(url, kwds): + self.update_kwdict(kwds) + self.handle_url(url, kwds) + elif msg[0] == Message.Version: if msg[1] != 1: raise "unsupported message-version ({}, {})".format( @@ -188,6 +194,10 @@ class DownloadJob(Job): for pp in postprocessors: pp.prepare(pathfmt) + if kwdict.get("metadata_only"): + self.handle_skip() + return + if pathfmt.exists(archive): self.handle_skip() return diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py index e63d4427..dbd87709 100644 --- a/gallery_dl/postprocessor/__init__.py +++ b/gallery_dl/postprocessor/__init__.py @@ -18,6 +18,7 @@ modules = [ "mtime", "ugoira", "zip", + "metadata_bypost", ] log = logging.getLogger("postprocessor") diff --git a/gallery_dl/postprocessor/metadata_bypost.py b/gallery_dl/postprocessor/metadata_bypost.py new file mode 100644 index 00000000..35d4ef34 --- /dev/null +++ b/gallery_dl/postprocessor/metadata_bypost.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Write metadata to JSON files""" + +from .metadata import __postprocessor__ as MetadataPP + + +class Metadata_BypostPP(MetadataPP): + + def __init__(self, pathfmt, options): + MetadataPP.__init__(self, pathfmt, options) + + def prepare(self, pathfmt): + if pathfmt.kwdict.get("metadata_only"): + MetadataPP.run(self, pathfmt) + + def run(self, pathfmt): + return + +__postprocessor__ = Metadata_BypostPP From 6ed4fc07ff248e352ba6ef01a76f7abbae789e0b Mon Sep 17 00:00:00 2001 From: Gio Date: Mon, 9 Dec 2019 01:02:17 -0600 Subject: [PATCH 2/5] Don't print intentional metadata skips to the console. --- gallery_dl/job.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 3241c7c4..19878db1 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -195,7 +195,6 @@ class DownloadJob(Job): pp.prepare(pathfmt) if kwdict.get("metadata_only"): - self.handle_skip() return if pathfmt.exists(archive): From c20bb5c3387fdf40132b930bb1cd77fed51ed28e Mon Sep 17 00:00:00 2001 From: Gio Date: Mon, 9 Dec 2019 01:05:01 -0600 Subject: [PATCH 3/5] Naming convention, as per travis. --- gallery_dl/postprocessor/metadata_bypost.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gallery_dl/postprocessor/metadata_bypost.py b/gallery_dl/postprocessor/metadata_bypost.py index 35d4ef34..05781742 100644 --- a/gallery_dl/postprocessor/metadata_bypost.py +++ b/gallery_dl/postprocessor/metadata_bypost.py @@ -11,7 +11,7 @@ from .metadata import __postprocessor__ as MetadataPP -class Metadata_BypostPP(MetadataPP): +class Metadata_bypostPP(MetadataPP): def __init__(self, pathfmt, options): MetadataPP.__init__(self, pathfmt, options) @@ -23,4 +23,4 @@ class Metadata_BypostPP(MetadataPP): def run(self, pathfmt): return -__postprocessor__ = Metadata_BypostPP +__postprocessor__ = Metadata_bypostPP From c0b9ad678d36b84069fdd9806e61eff6c37c7b1b Mon Sep 17 00:00:00 2001 From: Gio Date: Mon, 9 Dec 2019 16:02:15 -0600 Subject: [PATCH 4/5] Separate metadata from handle_url into handle_metadata, commenting --- gallery_dl/extractor/patreon.py | 1 + gallery_dl/job.py | 24 +++++++++++++++------ gallery_dl/postprocessor/metadata_bypost.py | 1 + 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 60549983..fefef48c 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -69,6 +69,7 @@ class PatreonExtractor(Extractor): post["type"] = "content" yield Message.Url, url, text.nameext_from_url(url, post) + # Metadata for post using dummy url for formatting post.update({"metadata_only": True}) url = post.get("creator").get("image_url") yield Message.Metadata, url, text.nameext_from_url(url, post) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 19878db1..972174a4 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -99,9 +99,8 @@ class Job(): elif msg[0] == Message.Metadata: _, url, kwds = msg - if self.pred_url(url, kwds): - self.update_kwdict(kwds) - self.handle_url(url, kwds) + self.update_kwdict(kwds) + self.handle_metadata(url, kwds) elif msg[0] == Message.Version: if msg[1] != 1: @@ -194,9 +193,6 @@ class DownloadJob(Job): for pp in postprocessors: pp.prepare(pathfmt) - if kwdict.get("metadata_only"): - return - if pathfmt.exists(archive): self.handle_skip() return @@ -238,6 +234,19 @@ class DownloadJob(Job): pp.run_after(pathfmt) self._skipcnt = 0 + def handle_metadata(self, url, kwdict, fallback=None): + """Download the resource specified in 'url'""" + postprocessors = self.postprocessors + pathfmt = self.pathfmt + + # prepare download + pathfmt.set_filename(kwdict) + + if postprocessors: + for pp in postprocessors: + pp.prepare(pathfmt) + return + def handle_urllist(self, urls, kwdict): """Download the resource specified in 'url'""" fallback = iter(urls) @@ -510,6 +519,9 @@ class DataJob(Job): def handle_url(self, url, kwdict): self.data.append((Message.Url, url, self.filter(kwdict))) + def handle_metadata(self, url, kwdict): + self.data.append((Message.Url, url, self.filter(kwdict))) + def handle_urllist(self, urls, kwdict): self.data.append((Message.Urllist, list(urls), self.filter(kwdict))) diff --git a/gallery_dl/postprocessor/metadata_bypost.py b/gallery_dl/postprocessor/metadata_bypost.py index 05781742..cc5dc165 100644 --- a/gallery_dl/postprocessor/metadata_bypost.py +++ b/gallery_dl/postprocessor/metadata_bypost.py @@ -17,6 +17,7 @@ class Metadata_bypostPP(MetadataPP): MetadataPP.__init__(self, pathfmt, options) def prepare(self, pathfmt): + # Only run this processor on metadata messages, not individual images. if pathfmt.kwdict.get("metadata_only"): MetadataPP.run(self, pathfmt) From 63e6993716db8d8bedfb7b0d445c7161493046b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 16 Dec 2019 17:19:23 +0100 Subject: [PATCH 5/5] merge 'bypost' functionality into metadata postprocessor --- gallery_dl/extractor/patreon.py | 11 +++--- gallery_dl/job.py | 37 ++++++++++----------- gallery_dl/postprocessor/__init__.py | 1 - gallery_dl/postprocessor/common.py | 4 +++ gallery_dl/postprocessor/metadata.py | 3 ++ gallery_dl/postprocessor/metadata_bypost.py | 27 --------------- 6 files changed, 29 insertions(+), 54 deletions(-) delete mode 100644 gallery_dl/postprocessor/metadata_bypost.py diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index fefef48c..4eda3bab 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -33,13 +33,15 @@ class PatreonExtractor(Extractor): PatreonExtractor._warning = False for post in self.posts(): - yield Message.Directory, post - ids = set() post["num"] = 0 content = post.get("content") postfile = post.get("post_file") + yield Message.Directory, post + yield Message.Metadata, text.nameext_from_url( + post["creator"].get("image_url", ""), post) + for image in post["images"]: url = image.get("download_url") if not url: @@ -69,11 +71,6 @@ class PatreonExtractor(Extractor): post["type"] = "content" yield Message.Url, url, text.nameext_from_url(url, post) - # Metadata for post using dummy url for formatting - post.update({"metadata_only": True}) - url = post.get("creator").get("image_url") - yield Message.Metadata, url, text.nameext_from_url(url, post) - def posts(self): """Return all relevant post objects""" diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 972174a4..7932cd0a 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -98,9 +98,8 @@ class Job(): self.handle_urllist(urls, kwds) elif msg[0] == Message.Metadata: - _, url, kwds = msg - self.update_kwdict(kwds) - self.handle_metadata(url, kwds) + self.update_kwdict(msg[1]) + self.handle_metadata(msg[1]) elif msg[0] == Message.Version: if msg[1] != 1: @@ -119,6 +118,9 @@ class Job(): def handle_directory(self, kwdict): """Handle Message.Directory""" + def handle_metadata(self, kwdict): + """Handle Message.Metadata""" + def handle_queue(self, url, kwdict): """Handle Message.Queue""" @@ -234,19 +236,6 @@ class DownloadJob(Job): pp.run_after(pathfmt) self._skipcnt = 0 - def handle_metadata(self, url, kwdict, fallback=None): - """Download the resource specified in 'url'""" - postprocessors = self.postprocessors - pathfmt = self.pathfmt - - # prepare download - pathfmt.set_filename(kwdict) - - if postprocessors: - for pp in postprocessors: - pp.prepare(pathfmt) - return - def handle_urllist(self, urls, kwdict): """Download the resource specified in 'url'""" fallback = iter(urls) @@ -260,6 +249,16 @@ class DownloadJob(Job): else: self.pathfmt.set_directory(kwdict) + def handle_metadata(self, kwdict): + """Run postprocessors with metadata from 'kwdict'""" + postprocessors = self.postprocessors + + if postprocessors: + pathfmt = self.pathfmt + pathfmt.set_filename(kwdict) + for pp in postprocessors: + pp.run_metadata(pathfmt) + def handle_queue(self, url, kwdict): if "_extractor" in kwdict: extr = kwdict["_extractor"].from_url(url) @@ -519,15 +518,15 @@ class DataJob(Job): def handle_url(self, url, kwdict): self.data.append((Message.Url, url, self.filter(kwdict))) - def handle_metadata(self, url, kwdict): - self.data.append((Message.Url, url, self.filter(kwdict))) - def handle_urllist(self, urls, kwdict): self.data.append((Message.Urllist, list(urls), self.filter(kwdict))) def handle_directory(self, kwdict): self.data.append((Message.Directory, self.filter(kwdict))) + def handle_metadata(self, kwdict): + self.data.append((Message.Metadata, self.filter(kwdict))) + def handle_queue(self, url, kwdict): self.data.append((Message.Queue, url, self.filter(kwdict))) diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py index dbd87709..e63d4427 100644 --- a/gallery_dl/postprocessor/__init__.py +++ b/gallery_dl/postprocessor/__init__.py @@ -18,7 +18,6 @@ modules = [ "mtime", "ugoira", "zip", - "metadata_bypost", ] log = logging.getLogger("postprocessor") diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 83b42eb6..70b0dfb7 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -26,6 +26,10 @@ class PostProcessor(): def run(pathfmt): """Execute the postprocessor for a file""" + @staticmethod + def run_metadata(pathfmt): + """Execute the postprocessor for a file""" + @staticmethod def run_after(pathfmt): """Execute postprocessor after moving a file to its target location""" diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index 75c5fffa..bc264845 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -40,6 +40,9 @@ class MetadataPP(PostProcessor): self.path = self._path_append self.extension = options.get("extension", ext) + if options.get("bypost"): + self.run_metadata, self.run = self.run, self.run_metadata + def run(self, pathfmt): with open(self.path(pathfmt), "w", encoding="utf-8") as file: self.write(file, pathfmt.kwdict) diff --git a/gallery_dl/postprocessor/metadata_bypost.py b/gallery_dl/postprocessor/metadata_bypost.py deleted file mode 100644 index cc5dc165..00000000 --- a/gallery_dl/postprocessor/metadata_bypost.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2019 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Write metadata to JSON files""" - -from .metadata import __postprocessor__ as MetadataPP - - -class Metadata_bypostPP(MetadataPP): - - def __init__(self, pathfmt, options): - MetadataPP.__init__(self, pathfmt, options) - - def prepare(self, pathfmt): - # Only run this processor on metadata messages, not individual images. - if pathfmt.kwdict.get("metadata_only"): - MetadataPP.run(self, pathfmt) - - def run(self, pathfmt): - return - -__postprocessor__ = Metadata_bypostPP