[patreon] fix regex pattern for posts
The previous one would match the first number in the URL slug as post ID, which would fail for posts with numbers in their title.
This commit is contained in:
@@ -234,12 +234,14 @@ class PatreonUserExtractor(PatreonExtractor):
|
||||
class PatreonPostExtractor(PatreonExtractor):
|
||||
"""Extractor for media from a single post"""
|
||||
subcategory = "post"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
|
||||
r"/posts/[^/?&#]*?(\d+)")
|
||||
pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)"
|
||||
test = (
|
||||
("https://www.patreon.com/posts/precious-metal-23563293", {
|
||||
"count": 4,
|
||||
}),
|
||||
("https://www.patreon.com/posts/er1-28201153", {
|
||||
"count": 1,
|
||||
}),
|
||||
("https://www.patreon.com/posts/not-found-123", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
@@ -247,10 +249,10 @@ class PatreonPostExtractor(PatreonExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
PatreonExtractor.__init__(self, match)
|
||||
self.post_id = match.group(1)
|
||||
self.slug = match.group(1)
|
||||
|
||||
def posts(self):
|
||||
url = "{}/posts/{}".format(self.root, self.post_id)
|
||||
url = "{}/posts/{}".format(self.root, self.slug)
|
||||
page = self.request(url, notfound="post").text
|
||||
data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
|
||||
post = json.loads(data + "}")["post"]
|
||||
|
||||
Reference in New Issue
Block a user