From 493fc483c603bcf9362ccf68ac90c6fe50f11353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 17 Jul 2025 18:38:54 +0200 Subject: [PATCH] [scripts/init] handle subdomains when building BASE_PATTERN --- scripts/init.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/scripts/init.py b/scripts/init.py index 308c0c51..d5549eaa 100755 --- a/scripts/init.py +++ b/scripts/init.py @@ -117,7 +117,7 @@ class {ccat}Base(): class {ccat}ChapterExtractor({ccat}Base, ChapterExtractor): """Extractor for {cat} manga chapters""" - pattern = BASE_PATTERN + r"/PATH" + pattern = rf"{{BASE_PATTERN}}/PATH" example = "" def __init__(self, match): @@ -149,7 +149,7 @@ class {ccat}ChapterExtractor({ccat}Base, ChapterExtractor): class {ccat}MangaExtractor({ccat}Base, MangaExtractor): """Extractor for {cat} manga""" chapterclass = {ccat}ChapterExtractor - pattern = BASE_PATTERN + r"/PATH" + pattern = rf"{{BASE_PATTERN}}/PATH" example = "" def __init__(self, match): @@ -198,8 +198,19 @@ class {ccat}UserExtractor(Dispatch, {ccat}Extractor) def build_base_pattern(opts): + domain = opts["domain"] + if domain.count(".") > 1: + subdomain, domain, tld = domain.rsplit(".", 2) + domain = f"{domain}.{tld}" + if subdomain == "www": + subdomain = "(?:www\\.)?" + else: + subdomain = re.escape(subdomain + ".") + else: + subdomain = "(?:www\\.)?" + return f"""\ -BASE_PATTERN = r"(?:https?://)?(?:www\\.)?{re.escape(opts["domain"])}" +BASE_PATTERN = r"(?:https?://)?{subdomain}{re.escape(domain)}" """