[patreon] fix regex pattern for posts

The previous one would match the first number in the URL slug as
post ID, which would fail for posts with numbers in their title.
This commit is contained in:
Mike Fährmann
2019-12-14 22:06:08 +01:00
parent fe19e233f3
commit 0cd157300e

View File

@@ -234,12 +234,14 @@ class PatreonUserExtractor(PatreonExtractor):
class PatreonPostExtractor(PatreonExtractor):
"""Extractor for media from a single post"""
subcategory = "post"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/posts/[^/?&#]*?(\d+)")
pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)"
test = (
("https://www.patreon.com/posts/precious-metal-23563293", {
"count": 4,
}),
("https://www.patreon.com/posts/er1-28201153", {
"count": 1,
}),
("https://www.patreon.com/posts/not-found-123", {
"exception": exception.NotFoundError,
}),
@@ -247,10 +249,10 @@ class PatreonPostExtractor(PatreonExtractor):
def __init__(self, match):
PatreonExtractor.__init__(self, match)
self.post_id = match.group(1)
self.slug = match.group(1)
def posts(self):
url = "{}/posts/{}".format(self.root, self.post_id)
url = "{}/posts/{}".format(self.root, self.slug)
page = self.request(url, notfound="post").text
data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
post = json.loads(data + "}")["post"]