From 2818973981ae28a0f2d29f638fbc971e6245e53d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 10 Oct 2024 17:30:55 +0200 Subject: [PATCH] [gelbooru_v02] unescape categorized tags --- gallery_dl/extractor/gelbooru_v02.py | 5 +++-- test/results/realbooru.py | 2 +- test/results/rule34.py | 1 + test/results/safebooru.py | 2 +- test/results/xbooru.py | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index fbbd26c8..0baad2fa 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -97,6 +97,7 @@ class GelbooruV02Extractor(booru.BooruExtractor): @staticmethod def _prepare(post): + post["tags"] = post["tags"].strip() post["date"] = text.parse_datetime( post["created_at"], "%a %b %d %H:%M:%S %z %Y") @@ -114,7 +115,7 @@ class GelbooruV02Extractor(booru.BooruExtractor): pattern = re.compile( r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S) for tag_type, tag_name in pattern.findall(tag_container): - tags[tag_type].append(text.unquote(tag_name)) + tags[tag_type].append(text.unescape(text.unquote(tag_name))) for key, value in tags.items(): post["tags_" + key] = " ".join(value) @@ -178,7 +179,7 @@ class GelbooruV02Extractor(booru.BooruExtractor): pattern = re.compile( r'