[yandere] add option to split tags by type (#92)

2018-06-29 19:38:53 +02:00
parent a699787d01
commit 87853538b4
4 changed files with 61 additions and 0 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -617,6 +617,25 @@ Description A (comma-separated) list of post types to extract images, etc. from.
 =========== =====


+extractor.yandere.tags
+----------------------
+=========== =====
+Type        ``bool``
+Default     ``false``
+Description Split tags into different categories
+            and provide the following additional metadata-entries:
+
+            - ``tags_artist``
+            - ``tags_character``
+            - ``tags_circle``
+            - ``tags_copyright``
+            - ``tags_faults``
+            - ``tags_general``
+
+            Note: This requires 1 additional HTTP request for each post.
+=========== =====
+
+

 Downloader Options
 ==================
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -104,6 +104,10 @@
            "inline": false,
            "posts": "photo",
            "reblogs": true
+        },
+        "yandere":
+        {
+            "tags": false
        }
    },

--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -53,6 +53,7 @@ class BooruExtractor(SharedConfigExtractor):
                    if url.startswith("/"):
                        url = text.urljoin(self.api_url, url)
                    image.update(data)
+                    self.prepare(image)
                    yield Message.Url, url, text.nameext_from_url(url, image)
                except KeyError:
                    continue
@@ -80,6 +81,9 @@ class BooruExtractor(SharedConfigExtractor):
        """Collect metadata for extractor-job"""
        return {}

+    def prepare(self, image):
+        """Prepare and modify an 'image' object"""
+

 class XmlParserMixin():
    """Mixin for XML based API responses"""
--- a/gallery_dl/extractor/yandere.py
+++ b/gallery_dl/extractor/yandere.py
@@ -9,6 +9,7 @@
 """Extract images from https://yande.re/"""

 from . import booru
+from .. import text


 class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
@@ -16,6 +17,30 @@ class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
    category = "yandere"
    api_url = "https://yande.re/post.json"

+    def __init__(self, match):
+        super().__init__(match)
+        if self.config("tags", False):
+            self.prepare = self._categorize_tags
+
+    def _categorize_tags(self, image):
+        url = "https://yande.re/post/show/{}".format(image["id"])
+        page = self.request(url).text
+        taghtml = text.extract(page, '<ul id="tag-sidebar">', '</ul>')[0]
+
+        pos = 0
+        tags = {"artist": [], "copyright": [], "character": [],
+                "circle": [], "faults": [], "general": []}
+
+        while True:
+            tagtype, pos = text.extract(taghtml, "tag-type-", '"', pos)
+            if not tagtype:
+                break
+            tagname, pos = text.extract(taghtml, "?tags=", '"', pos)
+            tags[tagtype].append(text.unquote(tagname))
+
+        for key, value in tags.items():
+            image["tags_" + key] = " ".join(value)
+

 class YandereTagExtractor(booru.TagMixin, YandereExtractor):
    """Extractor for images from yande.re based on search-tags"""
@@ -39,6 +64,15 @@ class YanderePostExtractor(booru.PostMixin, YandereExtractor):
    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"]
    test = [("https://yande.re/post/show/51824", {
        "content": "59201811c728096b2d95ce6896fd0009235fe683",
+        "options": (("tags", True),),
+        "keyword": {
+            "tags_artist": "sasaki_tamaru",
+            "tags_circle": "softhouse_chara",
+            "tags_copyright": "ouzoku",
+            "tags_character": str,
+            "tags_faults": str,
+            "tags_general": str,
+        },
    })]