[realbooru] fix 'tags' for video posts (#8455)

* fix realbooru tags for video posts
* fix lines too long
* combine 'tags' and 'tags_<category>' extraction
    - extract tag categories independent of 'tags' option
* add 'video' & 'animated GIF' tests

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
PrivateAccount85642650
2025-10-24 11:33:29 +02:00
committed by GitHub
parent 68909724e7
commit 481bf1e393
2 changed files with 68 additions and 15 deletions

View File

@@ -28,18 +28,30 @@ class RealbooruExtractor(booru.BooruExtractor):
extr('class="container"', '>')
post = {
"_html" : page,
"id" : post_id,
"rating" : "e" if rating == "adult" else (rating or "?")[0],
"tags" : text.unescape(extr(' alt="', '"')),
"file_url" : extr('src="', '"'),
"created_at": extr(">Posted at ", " by "),
"uploader" : extr(">", "<"),
"score" : extr('">', "<"),
"tags" : extr('<br />', "</div>"),
"title" : extr('id="title" style="width: 100%;" value="', '"'),
"source" : extr('d="source" style="width: 100%;" value="', '"'),
}
tags_container = post["tags"]
tags = []
tags_categories = collections.defaultdict(list)
pattern = text.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
for tag_type, tag_name in pattern.findall(tags_container):
tag = text.unescape(text.unquote(tag_name))
tags.append(tag)
tags_categories[tag_type].append(tag)
for key, value in tags_categories.items():
post[f"tags_{key}"] = ", ".join(value)
tags.sort()
post["tags"] = ", ".join(tags)
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
return post
@@ -66,16 +78,6 @@ class RealbooruExtractor(booru.BooruExtractor):
return
params["pid"] += self.per_page
def _tags(self, post, _):
page = post["_html"]
tag_container = text.extr(page, 'id="tagLink"', '</div>')
tags = collections.defaultdict(list)
pattern = text.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
for key, value in tags.items():
post["tags_" + key] = " ".join(value)
class RealbooruTagExtractor(RealbooruExtractor):
subcategory = "tag"