generalize tag-splitting option (#92)

- extend functionality to other booru sites:
  - http://behoimi.org/
  - https://konachan.com/
  - https://e621.net/
  - https://rule34.xxx/
  - https://safebooru.org/
  - https://yande.re/
This commit is contained in:
Mike Fährmann
2018-07-01 22:28:52 +02:00
parent 188e956c4e
commit 4a57509392
8 changed files with 92 additions and 56 deletions

View File

@@ -9,37 +9,13 @@
"""Extract images from https://yande.re/"""
from . import booru
from .. import text
class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
"""Base class for yandere extractors"""
category = "yandere"
api_url = "https://yande.re/post.json"
def __init__(self, match):
super().__init__(match)
if self.config("tags", False):
self.prepare = self._categorize_tags
def _categorize_tags(self, image):
url = "https://yande.re/post/show/{}".format(image["id"])
page = self.request(url).text
taghtml = text.extract(page, '<ul id="tag-sidebar">', '</ul>')[0]
pos = 0
tags = {"artist": [], "copyright": [], "character": [],
"circle": [], "faults": [], "general": []}
while True:
tagtype, pos = text.extract(taghtml, "tag-type-", '"', pos)
if not tagtype:
break
tagname, pos = text.extract(taghtml, "?tags=", '"', pos)
tags[tagtype].append(text.unquote(tagname))
for key, value in tags.items():
image["tags_" + key] = " ".join(value)
post_url = "https://yande.re/post/show/{}"
class YandereTagExtractor(booru.TagMixin, YandereExtractor):
@@ -69,8 +45,6 @@ class YanderePostExtractor(booru.PostMixin, YandereExtractor):
"tags_artist": "sasaki_tamaru",
"tags_circle": "softhouse_chara",
"tags_copyright": "ouzoku",
"tags_character": str,
"tags_faults": str,
"tags_general": str,
},
})]