[bellazon] match thread URLs with escaped characters

This commit is contained in:
Mike Fährmann
2026-01-07 15:40:43 +01:00
parent 775e2d6821
commit 88732321ad
2 changed files with 8 additions and 3 deletions

View File

@@ -196,7 +196,7 @@ class BellazonExtractor(Extractor):
class BellazonPostExtractor(BellazonExtractor):
subcategory = "post"
pattern = (BASE_PATTERN + r"(/topic/\d+-[\w-]+(?:/page/\d+)?)"
pattern = (BASE_PATTERN + r"(/topic/\d+-[^/?#]+(?:/page/\d+)?)"
r"/?#(?:findC|c)omment-(\d+)")
example = "https://www.bellazon.com/main/topic/123-SLUG/#findComment-12345"
@@ -215,7 +215,7 @@ class BellazonPostExtractor(BellazonExtractor):
class BellazonThreadExtractor(BellazonExtractor):
subcategory = "thread"
pattern = BASE_PATTERN + r"(/topic/\d+-[\w-]+)(?:/page/(\d+))?"
pattern = BASE_PATTERN + r"(/topic/\d+-[^/?#]+)(?:/page/(\d+))?"
example = "https://www.bellazon.com/main/topic/123-SLUG/"
def posts(self):
@@ -240,7 +240,7 @@ class BellazonThreadExtractor(BellazonExtractor):
class BellazonForumExtractor(BellazonExtractor):
subcategory = "forum"
pattern = BASE_PATTERN + r"(/forum/\d+-[\w-]+)(?:/page/(\d+))?"
pattern = BASE_PATTERN + r"(/forum/\d+-[^/?#]+)(?:/page/(\d+))?"
example = "https://www.bellazon.com/main/forum/123-SLUG/"
def items(self):

View File

@@ -399,6 +399,11 @@ __tests__ = (
},
},
{
"#url" : "https://www.bellazon.com/main/topic/123434-%D0%BD%D0%B0-%D1%84%D0%BE%D1%82%D0%BE-%D0%B2%D0%B8%D0%BA%D1%82%D0%BE%D1%80%D0%B8%D1%8F-%D0%BA%D0%BE%D0%BB%D0%B5%D1%81%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%D0%B0/",
"#class" : bellazon.BellazonThreadExtractor,
},
{
"#url" : "https://www.bellazon.com/main/forum/3-actresses/",
"#class" : bellazon.BellazonForumExtractor,