[bellazon] match thread URLs with escaped characters
This commit is contained in:
@@ -196,7 +196,7 @@ class BellazonExtractor(Extractor):
|
||||
|
||||
class BellazonPostExtractor(BellazonExtractor):
|
||||
subcategory = "post"
|
||||
pattern = (BASE_PATTERN + r"(/topic/\d+-[\w-]+(?:/page/\d+)?)"
|
||||
pattern = (BASE_PATTERN + r"(/topic/\d+-[^/?#]+(?:/page/\d+)?)"
|
||||
r"/?#(?:findC|c)omment-(\d+)")
|
||||
example = "https://www.bellazon.com/main/topic/123-SLUG/#findComment-12345"
|
||||
|
||||
@@ -215,7 +215,7 @@ class BellazonPostExtractor(BellazonExtractor):
|
||||
|
||||
class BellazonThreadExtractor(BellazonExtractor):
|
||||
subcategory = "thread"
|
||||
pattern = BASE_PATTERN + r"(/topic/\d+-[\w-]+)(?:/page/(\d+))?"
|
||||
pattern = BASE_PATTERN + r"(/topic/\d+-[^/?#]+)(?:/page/(\d+))?"
|
||||
example = "https://www.bellazon.com/main/topic/123-SLUG/"
|
||||
|
||||
def posts(self):
|
||||
@@ -240,7 +240,7 @@ class BellazonThreadExtractor(BellazonExtractor):
|
||||
|
||||
class BellazonForumExtractor(BellazonExtractor):
|
||||
subcategory = "forum"
|
||||
pattern = BASE_PATTERN + r"(/forum/\d+-[\w-]+)(?:/page/(\d+))?"
|
||||
pattern = BASE_PATTERN + r"(/forum/\d+-[^/?#]+)(?:/page/(\d+))?"
|
||||
example = "https://www.bellazon.com/main/forum/123-SLUG/"
|
||||
|
||||
def items(self):
|
||||
|
||||
@@ -399,6 +399,11 @@ __tests__ = (
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/123434-%D0%BD%D0%B0-%D1%84%D0%BE%D1%82%D0%BE-%D0%B2%D0%B8%D0%BA%D1%82%D0%BE%D1%80%D0%B8%D1%8F-%D0%BA%D0%BE%D0%BB%D0%B5%D1%81%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%D0%B0/",
|
||||
"#class" : bellazon.BellazonThreadExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/forum/3-actresses/",
|
||||
"#class" : bellazon.BellazonForumExtractor,
|
||||
|
||||
Reference in New Issue
Block a user