[patreon] improve 'campaign_id' handling (#4699, #4715)

- add ways to directly specify a 'campaign_id'
  - 'campaign-id' config option
  - 'c' or 'campaign_id' URL query parameter
- more descriptive error messages
- show 'campaign_id' value in debug log
This commit is contained in:
Mike Fährmann
2023-10-25 17:18:06 +02:00
parent 31dbbffc0b
commit 12a800ce21
2 changed files with 50 additions and 18 deletions

View File

@@ -2440,6 +2440,20 @@ Description
Note: This requires 1 additional HTTP request per post.
extractor.patreon.campaign-id
-----------------------------
Type
``string``
Default
``"auto"``
Description
Alternative way of specifying the ``campaign_id`` value of a creator
in case the automatic extraction method no longer functions.
Another way of specifying this value is using a ``c`` or ``campaign_id``
URL query parameter, e,g, ``https://www.patreon.com/NAME?c=12345``.
extractor.patreon.files
-----------------------
Type

View File

@@ -267,25 +267,10 @@ class PatreonCreatorExtractor(PatreonExtractor):
def posts(self):
query = text.parse_query(self.query)
campaign_id = self._get_campaign_id(query)
filters = self._get_filters(query)
creator_id = query.get("u")
if creator_id:
url = "{}/user/posts?u={}".format(self.root, creator_id)
else:
url = "{}/{}/posts".format(self.root, self.creator)
page = self.request(url, notfound="creator").text
try:
data = self._extract_bootstrap(page)
campaign_id = data["campaign"]["data"]["id"]
except (KeyError, ValueError):
raise exception.NotFoundError("creator")
filters = "".join(
"&filter[{}={}".format(key[8:], text.escape(value))
for key, value in query.items()
if key.startswith("filters[")
)
self.log.debug("campaign_id: %s", campaign_id)
url = self._build_url("posts", (
"&filter[campaign_id]=" + campaign_id +
@@ -295,6 +280,39 @@ class PatreonCreatorExtractor(PatreonExtractor):
))
return self._pagination(url)
def _get_campaign_id(self, query):
campaign_id = self.config("campaign-id")
if campaign_id and campaign_id != "auto":
return str(campaign_id)
campaign_id = query.get("c") or query.get("campaign_id")
if campaign_id:
return campaign_id
user_id = query.get("u")
if user_id:
url = "{}/user/posts?u={}".format(self.root, user_id)
else:
url = "{}/{}/posts".format(self.root, self.creator)
page = self.request(url, notfound="creator").text
try:
data = None
data = self._extract_bootstrap(page)
return data["campaign"]["data"]["id"]
except (KeyError, ValueError) as exc:
self.log.debug(data)
raise exception.StopExtraction(
"Unable to extract campaign ID (%s: %s)",
exc.__class__.__name__, exc)
def _get_filters(self, query):
return "".join(
"&filter[{}={}".format(key[8:], text.escape(value))
for key, value in query.items()
if key.startswith("filters[")
)
class PatreonUserExtractor(PatreonExtractor):
"""Extractor for media from creators supported by you"""