[common] allow overriding more default 'User-Agent' headers (#6496)

ignore 'extractor.user-agent' if it is the default useragent value
and an extractor wants to set its own custom value
This commit is contained in:
Mike Fährmann
2024-11-25 20:44:25 +01:00
parent 94c3a4dca5
commit 5412b22dae
4 changed files with 10 additions and 7 deletions

View File

@@ -42,6 +42,8 @@ class Extractor():
ciphers = None
tls12 = True
browser = None
useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
"rv:128.0) Gecko/20100101 Firefox/128.0")
request_interval = 0.0
request_interval_min = 0.0
request_interval_429 = 60.0
@@ -381,11 +383,13 @@ class Extractor():
ssl_ciphers = SSL_CIPHERS[browser]
else:
useragent = self.config("user-agent")
if useragent is None:
useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
"rv:128.0) Gecko/20100101 Firefox/128.0")
if useragent is None or useragent == "auto":
useragent = self.useragent
elif useragent == "browser":
useragent = _browser_useragent()
elif useragent is config.get(("extractor",), "user-agent") and \
useragent == Extractor.useragent:
useragent = self.useragent
headers["User-Agent"] = useragent
headers["Accept"] = "*/*"
headers["Accept-Language"] = "en-US,en;q=0.5"

View File

@@ -20,10 +20,10 @@ class DanbooruExtractor(BaseExtractor):
page_limit = 1000
page_start = None
per_page = 200
useragent = util.USERAGENT
request_interval = (0.5, 1.5)
def _init(self):
self.session.headers["User-Agent"] = util.USERAGENT
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)

View File

@@ -20,11 +20,10 @@ class E621Extractor(danbooru.DanbooruExtractor):
page_limit = 750
page_start = None
per_page = 320
useragent = util.USERAGENT + " (by mikf)"
request_interval_min = 1.0
def items(self):
self.session.headers["User-Agent"] = util.USERAGENT + " (by mikf)"
includes = self.config("metadata") or ()
if includes:
if isinstance(includes, str):

View File

@@ -26,6 +26,7 @@ class MangadexExtractor(Extractor):
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{chapter_id}_{page}"
root = "https://mangadex.org"
useragent = util.USERAGENT
_cache = {}
def __init__(self, match):
@@ -33,7 +34,6 @@ class MangadexExtractor(Extractor):
self.uuid = match.group(1)
def _init(self):
self.session.headers["User-Agent"] = util.USERAGENT
self.api = MangadexAPI(self)
def items(self):