[yandere] add option to split tags by type (#92)
This commit is contained in:
@@ -617,6 +617,25 @@ Description A (comma-separated) list of post types to extract images, etc. from.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.yandere.tags
|
||||
----------------------
|
||||
=========== =====
|
||||
Type ``bool``
|
||||
Default ``false``
|
||||
Description Split tags into different categories
|
||||
and provide the following additional metadata-entries:
|
||||
|
||||
- ``tags_artist``
|
||||
- ``tags_character``
|
||||
- ``tags_circle``
|
||||
- ``tags_copyright``
|
||||
- ``tags_faults``
|
||||
- ``tags_general``
|
||||
|
||||
Note: This requires 1 additional HTTP request for each post.
|
||||
=========== =====
|
||||
|
||||
|
||||
|
||||
Downloader Options
|
||||
==================
|
||||
|
||||
@@ -104,6 +104,10 @@
|
||||
"inline": false,
|
||||
"posts": "photo",
|
||||
"reblogs": true
|
||||
},
|
||||
"yandere":
|
||||
{
|
||||
"tags": false
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ class BooruExtractor(SharedConfigExtractor):
|
||||
if url.startswith("/"):
|
||||
url = text.urljoin(self.api_url, url)
|
||||
image.update(data)
|
||||
self.prepare(image)
|
||||
yield Message.Url, url, text.nameext_from_url(url, image)
|
||||
except KeyError:
|
||||
continue
|
||||
@@ -80,6 +81,9 @@ class BooruExtractor(SharedConfigExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {}
|
||||
|
||||
def prepare(self, image):
|
||||
"""Prepare and modify an 'image' object"""
|
||||
|
||||
|
||||
class XmlParserMixin():
|
||||
"""Mixin for XML based API responses"""
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
"""Extract images from https://yande.re/"""
|
||||
|
||||
from . import booru
|
||||
from .. import text
|
||||
|
||||
|
||||
class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
||||
@@ -16,6 +17,30 @@ class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
||||
category = "yandere"
|
||||
api_url = "https://yande.re/post.json"
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
if self.config("tags", False):
|
||||
self.prepare = self._categorize_tags
|
||||
|
||||
def _categorize_tags(self, image):
|
||||
url = "https://yande.re/post/show/{}".format(image["id"])
|
||||
page = self.request(url).text
|
||||
taghtml = text.extract(page, '<ul id="tag-sidebar">', '</ul>')[0]
|
||||
|
||||
pos = 0
|
||||
tags = {"artist": [], "copyright": [], "character": [],
|
||||
"circle": [], "faults": [], "general": []}
|
||||
|
||||
while True:
|
||||
tagtype, pos = text.extract(taghtml, "tag-type-", '"', pos)
|
||||
if not tagtype:
|
||||
break
|
||||
tagname, pos = text.extract(taghtml, "?tags=", '"', pos)
|
||||
tags[tagtype].append(text.unquote(tagname))
|
||||
|
||||
for key, value in tags.items():
|
||||
image["tags_" + key] = " ".join(value)
|
||||
|
||||
|
||||
class YandereTagExtractor(booru.TagMixin, YandereExtractor):
|
||||
"""Extractor for images from yande.re based on search-tags"""
|
||||
@@ -39,6 +64,15 @@ class YanderePostExtractor(booru.PostMixin, YandereExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"]
|
||||
test = [("https://yande.re/post/show/51824", {
|
||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "sasaki_tamaru",
|
||||
"tags_circle": "softhouse_chara",
|
||||
"tags_copyright": "ouzoku",
|
||||
"tags_character": str,
|
||||
"tags_faults": str,
|
||||
"tags_general": str,
|
||||
},
|
||||
})]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user