diff --git a/docs/configuration.rst b/docs/configuration.rst index 3cef140e..fc3b678c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3032,10 +3032,13 @@ Example Description Controls from which position to start the extraction process from. - * ``true``: Start from the beginning. - Log the most recent ``cursor`` value when interrupted before reaching the end. - * ``false``: Start from the beginning. - * any ``string``: Start from the position defined by this value. + ``true`` + | Start from the beginning. + | Log the most recent ``cursor`` value when interrupted before reaching the end. + ``false`` + Start from the beginning. + any ``string`` + Start from the position defined by this value. extractor.instagram.include @@ -3829,6 +3832,27 @@ Description Note: This requires 1 additional HTTP request per post. +extractor.patreon.cursor +------------------------ +Type + * ``bool`` + * ``string`` +Default + ``true`` +Example + ``"03:eyJ2IjoxLCJjIjoiMzU0NDQ1MjAiLCJ0IjoiIn0=:DTcmjBoVj01o_492YBYqHhqx"`` +Description + Controls from which position to start the extraction process from. + + ``true`` + | Start from the beginning. + | Log the most recent ``cursor`` value when interrupted before reaching the end. + ``false`` + Start from the beginning. + any ``string`` + Start from the position defined by this value. + + extractor.patreon.files ----------------------- Type @@ -5183,12 +5207,15 @@ Example Description Controls from which position to start the extraction process from. - * ``true``: Start from the beginning. - Log the most recent ``cursor`` value when interrupted before reaching the end. - * ``false``: Start from the beginning. - * any ``string``: Start from the position defined by this value. - - Note: A ``cursor`` value from one timeline cannot be used with another. + ``true`` + | Start from the beginning. + | Log the most recent ``cursor`` value when interrupted before reaching the end. + ``false`` + Start from the beginning. + any ``string`` + Start from the position defined by this value. +Note + A ``cursor`` value from one timeline cannot be used with another. extractor.twitter.expand diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 3fe66b3c..2c57ce46 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -506,6 +506,7 @@ { "cookies": null, + "cursor" : true, "files" : ["images", "image_large", "attachments", "postfile", "content"], "format-images": "download_url" }, diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 11e2418b..d9f8cf55 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -39,6 +39,8 @@ class PatreonExtractor(Extractor): self._images_fmt = format_images self._images_url = self._images_url_fmt + self._cursor = None + def items(self): generators = self._build_file_generators(self.config("files")) @@ -71,6 +73,11 @@ class PatreonExtractor(Extractor): else: self.log.debug("skipping %s (%s %s)", url, fhash, kind) + def finalize(self): + if self._cursor: + self.log.info("Use '-o cursor=%s' to continue downloading " + "from the current position", self._cursor) + def _postfile(self, post): if postfile := post.get("post_file"): url = postfile["url"] @@ -130,6 +137,7 @@ class PatreonExtractor(Extractor): } while url: + self._update_cursor(url) url = text.ensure_http_scheme(url) posts = self.request_json(url, headers=headers) @@ -139,9 +147,24 @@ class PatreonExtractor(Extractor): yield self._process(post, included) if "links" not in posts: - return + break url = posts["links"].get("next") + self._update_cursor("") + + def _init_cursor(self): + if cursor := self.config("cursor", True): + return "" if cursor is True else cursor + self._update_cursor = util.identity + return "" + + def _update_cursor(self, url): + params = text.parse_query(url.partition("?")[2]) + self._cursor = cursor = params.get("page[cursor]") + if cursor: + self.log.debug("Cursor: %s", cursor) + return cursor + def _process(self, post, included): """Process and extend a 'post' object""" attr = post["attributes"] @@ -255,7 +278,10 @@ class PatreonExtractor(Extractor): "&fields[media]=id,image_urls,download_url,metadata,file_name" "&fields[native_video_insights]=average_view_duration," "average_view_pct,has_preview,id,last_updated_at,num_views," - f"preview_views,video_duration{query}" + "preview_views,video_duration" + + f"&page[cursor]={self._init_cursor()}" + f"{query}" "&json-api-version=1.0" ) @@ -320,28 +346,27 @@ class PatreonCreatorExtractor(PatreonExtractor): def posts(self): creator, query = self.groups - query = text.parse_query(query) - campaign_id = self._get_campaign_id(creator, query) - filters = self._get_filters(query) - + params = text.parse_query(query) + campaign_id = self._get_campaign_id(creator, params) self.log.debug("campaign_id: %s", campaign_id) url = self._build_url("posts", ( f"&filter[campaign_id]={campaign_id}" "&filter[contains_exclusive_posts]=true" "&filter[is_draft]=false" - f"{filters}&sort={query.get('sort', '-published_at')}" + f"{self._get_filters(params)}" + f"&sort={params.get('sort', '-published_at')}" )) return self._pagination(url) - def _get_campaign_id(self, creator, query): + def _get_campaign_id(self, creator, params): if creator and creator.startswith("id:"): return creator[3:] - if campaign_id := query.get("c") or query.get("campaign_id"): + if campaign_id := params.get("c") or params.get("campaign_id"): return campaign_id - if user_id := query.get("u"): + if user_id := params.get("u"): url = f"{self.root}/user?u={user_id}" else: url = f"{self.root}/{creator}" @@ -367,10 +392,10 @@ class PatreonCreatorExtractor(PatreonExtractor): raise exception.AbortExtraction("Failed to extract campaign ID") - def _get_filters(self, query): + def _get_filters(self, params): return "".join( f"&filter[{key[8:]}={text.escape(value)}" - for key, value in query.items() + for key, value in params.items() if key.startswith("filters[") ) @@ -383,7 +408,6 @@ class PatreonUserExtractor(PatreonExtractor): def posts(self): url = self._build_url("stream", ( - "&page[cursor]=null" "&filter[is_following]=true" "&json-api-use-default-includes=false" ))