[moebooru] add generalized extractors for moebooru sites
- add support for sakugabooru.com (closes #1136) - add support for lolibooru.moe (closes #1050) This allows users to dynamically add support for moebooru/myimouto based sites by adding an entry to their config file (like for foolslide, foolfuuka, etc) For example: { "extractor": { "moebooru": { "new-site-1": {"root": "https://site1.net"}, "new-site-2": {"root": "https://www.site2.moe"} } } }
This commit is contained in:
@@ -69,13 +69,14 @@ Komikcast https://komikcast.com/ Chapters, Manga
|
|||||||
Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag Searches
|
Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag Searches
|
||||||
LINE BLOG https://www.lineblog.me/ Blogs, Posts
|
LINE BLOG https://www.lineblog.me/ Blogs, Posts
|
||||||
livedoor Blog http://blog.livedoor.jp/ Blogs, Posts
|
livedoor Blog http://blog.livedoor.jp/ Blogs, Posts
|
||||||
|
Lolibooru https://lolibooru.moe/ Pools, Popular Images, Posts, Tag Searches
|
||||||
Luscious https://members.luscious.net/ Albums, Search Results
|
Luscious https://members.luscious.net/ Albums, Search Results
|
||||||
Manga Fox https://fanfox.net/ Chapters
|
Manga Fox https://fanfox.net/ Chapters
|
||||||
Manga Here https://www.mangahere.cc/ Chapters, Manga
|
Manga Here https://www.mangahere.cc/ Chapters, Manga
|
||||||
Manga Stream https://readms.net/ Chapters
|
Manga Stream https://readms.net/ Chapters
|
||||||
MangaDex https://mangadex.org/ Chapters, Manga
|
MangaDex https://mangadex.org/ Chapters, Manga
|
||||||
MangaKakalot https://mangakakalot.com/ Chapters, Manga
|
MangaKakalot https://mangakakalot.com/ Chapters, Manga
|
||||||
Mangapanda https://www.mangapanda.com/ Chapters, Manga
|
Mangapanda http://www.mangapanda.com/ Chapters, Manga
|
||||||
MangaPark https://mangapark.net/ Chapters, Manga
|
MangaPark https://mangapark.net/ Chapters, Manga
|
||||||
Mangareader https://www.mangareader.net/ Chapters, Manga
|
Mangareader https://www.mangareader.net/ Chapters, Manga
|
||||||
Mangoxo https://www.mangoxo.com/ Albums, Channels Supported
|
Mangoxo https://www.mangoxo.com/ Albums, Channels Supported
|
||||||
@@ -110,6 +111,7 @@ RedGIFs https://redgifs.com/ individual Images, Sear
|
|||||||
rule #34 https://rule34.paheal.net/ Posts, Tag Searches
|
rule #34 https://rule34.paheal.net/ Posts, Tag Searches
|
||||||
Rule 34 https://rule34.xxx/ Pools, Posts, Tag Searches
|
Rule 34 https://rule34.xxx/ Pools, Posts, Tag Searches
|
||||||
Safebooru https://safebooru.org/ Pools, Posts, Tag Searches
|
Safebooru https://safebooru.org/ Pools, Posts, Tag Searches
|
||||||
|
Sakugabooru https://www.sakugabooru.com/ Pools, Popular Images, Posts, Tag Searches
|
||||||
Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag Searches Supported
|
Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag Searches Supported
|
||||||
Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag Searches
|
Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag Searches
|
||||||
Sen Manga https://raw.senmanga.com/ Chapters
|
Sen Manga https://raw.senmanga.com/ Chapters
|
||||||
|
|||||||
@@ -1,22 +1,21 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2015-2019 Mike Fährmann
|
# Copyright 2015-2020 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
"""Extract images from http://behoimi.org/"""
|
"""Extractors for http://behoimi.org/"""
|
||||||
|
|
||||||
from . import booru
|
from . import moebooru
|
||||||
|
|
||||||
|
|
||||||
class _3dbooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
class _3dbooruBase():
|
||||||
"""Base class for 3dbooru extractors"""
|
"""Base class for 3dbooru extractors"""
|
||||||
category = "3dbooru"
|
category = "3dbooru"
|
||||||
api_url = "http://behoimi.org/post/index.json"
|
basecategory = "booru"
|
||||||
post_url = "http://behoimi.org/post/show/{}"
|
root = "http://behoimi.org"
|
||||||
page_limit = 1000
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
super().__init__(match)
|
super().__init__(match)
|
||||||
@@ -26,7 +25,7 @@ class _3dbooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
class _3dbooruTagExtractor(booru.TagMixin, _3dbooruExtractor):
|
class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
|
||||||
"""Extractor for images from behoimi.org based on search-tags"""
|
"""Extractor for images from behoimi.org based on search-tags"""
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post"
|
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post"
|
||||||
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)")
|
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)")
|
||||||
@@ -35,8 +34,12 @@ class _3dbooruTagExtractor(booru.TagMixin, _3dbooruExtractor):
|
|||||||
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
|
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
params = {"tags": self.tags}
|
||||||
|
return self._pagination(self.root + "/post/index.json", params)
|
||||||
|
|
||||||
class _3dbooruPoolExtractor(booru.PoolMixin, _3dbooruExtractor):
|
|
||||||
|
class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor):
|
||||||
"""Extractor for image-pools from behoimi.org"""
|
"""Extractor for image-pools from behoimi.org"""
|
||||||
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"
|
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"
|
||||||
test = ("http://behoimi.org/pool/show/27", {
|
test = ("http://behoimi.org/pool/show/27", {
|
||||||
@@ -44,8 +47,12 @@ class _3dbooruPoolExtractor(booru.PoolMixin, _3dbooruExtractor):
|
|||||||
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
|
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
params = {"tags": "pool:" + self.pool_id}
|
||||||
|
return self._pagination(self.root + "/post/index.json", params)
|
||||||
|
|
||||||
class _3dbooruPostExtractor(booru.PostMixin, _3dbooruExtractor):
|
|
||||||
|
class _3dbooruPostExtractor(_3dbooruBase, moebooru.MoebooruPostExtractor):
|
||||||
"""Extractor for single images from behoimi.org"""
|
"""Extractor for single images from behoimi.org"""
|
||||||
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"
|
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"
|
||||||
test = ("http://behoimi.org/post/show/140852", {
|
test = ("http://behoimi.org/post/show/140852", {
|
||||||
@@ -60,8 +67,13 @@ class _3dbooruPostExtractor(booru.PostMixin, _3dbooruExtractor):
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
params = {"tags": "id:" + self.post_id}
|
||||||
|
return self._pagination(self.root + "/post/index.json", params)
|
||||||
|
|
||||||
class _3dbooruPopularExtractor(booru.MoebooruPopularMixin, _3dbooruExtractor):
|
|
||||||
|
class _3dbooruPopularExtractor(
|
||||||
|
_3dbooruBase, moebooru.MoebooruPopularExtractor):
|
||||||
"""Extractor for popular images from behoimi.org"""
|
"""Extractor for popular images from behoimi.org"""
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org"
|
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org"
|
||||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
||||||
@@ -70,8 +82,3 @@ class _3dbooruPopularExtractor(booru.MoebooruPopularMixin, _3dbooruExtractor):
|
|||||||
"pattern": r"http://behoimi\.org/data/../../[0-9a-f]{32}\.jpg",
|
"pattern": r"http://behoimi\.org/data/../../[0-9a-f]{32}\.jpg",
|
||||||
"count": 20,
|
"count": 20,
|
||||||
})
|
})
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
super().__init__(match)
|
|
||||||
self.api_url = "http://behoimi.org/post/popular_{scale}.json".format(
|
|
||||||
scale=self.scale)
|
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ modules = [
|
|||||||
"hentainexus",
|
"hentainexus",
|
||||||
"hiperdex",
|
"hiperdex",
|
||||||
"hitomi",
|
"hitomi",
|
||||||
"hypnohub",
|
|
||||||
"idolcomplex",
|
"idolcomplex",
|
||||||
"imagebam",
|
"imagebam",
|
||||||
"imagechest",
|
"imagechest",
|
||||||
@@ -60,7 +59,6 @@ modules = [
|
|||||||
"keenspot",
|
"keenspot",
|
||||||
"khinsider",
|
"khinsider",
|
||||||
"komikcast",
|
"komikcast",
|
||||||
"konachan",
|
|
||||||
"lineblog",
|
"lineblog",
|
||||||
"livedoor",
|
"livedoor",
|
||||||
"luscious",
|
"luscious",
|
||||||
@@ -123,8 +121,8 @@ modules = [
|
|||||||
"wikiart",
|
"wikiart",
|
||||||
"xhamster",
|
"xhamster",
|
||||||
"xvideos",
|
"xvideos",
|
||||||
"yandere",
|
|
||||||
"yuki",
|
"yuki",
|
||||||
|
"moebooru",
|
||||||
"foolfuuka",
|
"foolfuuka",
|
||||||
"foolslide",
|
"foolslide",
|
||||||
"mastodon",
|
"mastodon",
|
||||||
|
|||||||
@@ -1,68 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2019 Mike Fährmann
|
|
||||||
#
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
|
||||||
# published by the Free Software Foundation.
|
|
||||||
|
|
||||||
"""Extractors for https://hypnohub.net/"""
|
|
||||||
|
|
||||||
from . import booru
|
|
||||||
|
|
||||||
|
|
||||||
class HypnohubExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
|
||||||
"""Base class for hypnohub extractors"""
|
|
||||||
category = "hypnohub"
|
|
||||||
api_url = "https://hypnohub.net/post.json"
|
|
||||||
post_url = "https://hypnohub.net/post/show/{}"
|
|
||||||
|
|
||||||
|
|
||||||
class HypnohubTagExtractor(booru.TagMixin, HypnohubExtractor):
|
|
||||||
"""Extractor for images from hypnohub.net based on search-tags"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?hypnohub\.net"
|
|
||||||
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)")
|
|
||||||
test = ("https://hypnohub.net/post?tags=gonoike_biwa", {
|
|
||||||
"url": "2848abe3e433ad39bfdf5be5874682faaccea5be",
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class HypnohubPoolExtractor(booru.PoolMixin, HypnohubExtractor):
|
|
||||||
"""Extractor for image-pools from hypnohub.net"""
|
|
||||||
pattern = r"(?:https?://)?(?:www\.)?hypnohub\.net/pool/show/(?P<pool>\d+)"
|
|
||||||
test = ("https://hypnohub.net/pool/show/61", {
|
|
||||||
"url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf",
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class HypnohubPostExtractor(booru.PostMixin, HypnohubExtractor):
|
|
||||||
"""Extractor for single images from hypnohub.net"""
|
|
||||||
pattern = r"(?:https?://)?(?:www\.)?hypnohub\.net/post/show/(?P<post>\d+)"
|
|
||||||
test = ("https://hypnohub.net/post/show/73964", {
|
|
||||||
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
|
|
||||||
"options": (("tags", True),),
|
|
||||||
"keyword": {
|
|
||||||
"tags_artist": "gonoike_biwa icontrol_(manipper)",
|
|
||||||
"tags_character": "komaru_naegi",
|
|
||||||
"tags_copyright": "dangan_ronpa dangan_ronpa_another_episode",
|
|
||||||
"tags_general": str,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class HypnohubPopularExtractor(booru.MoebooruPopularMixin, HypnohubExtractor):
|
|
||||||
"""Extractor for popular images from hypnohub.net"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?hypnohub\.net"
|
|
||||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
|
||||||
r"(?:\?(?P<query>[^#]*))?")
|
|
||||||
test = (
|
|
||||||
("https://hypnohub.net/post/popular_by_month?month=6&year=2014", {
|
|
||||||
"count": 20,
|
|
||||||
}),
|
|
||||||
("https://hypnohub.net/post/popular_recent"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
super().__init__(match)
|
|
||||||
self.api_url = "https://hypnohub.net/post/popular_{scale}.json".format(
|
|
||||||
scale=self.scale)
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2015-2019 Mike Fährmann
|
|
||||||
#
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
|
||||||
# published by the Free Software Foundation.
|
|
||||||
|
|
||||||
"""Extract images from https://konachan.com/"""
|
|
||||||
|
|
||||||
from . import booru
|
|
||||||
|
|
||||||
|
|
||||||
class KonachanExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
|
||||||
"""Base class for konachan extractors"""
|
|
||||||
category = "konachan"
|
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
root = "https://konachan." + match.group("tld")
|
|
||||||
self.api_url = root + "/post.json"
|
|
||||||
self.post_url = root + "/post/show/{}"
|
|
||||||
super().__init__(match)
|
|
||||||
|
|
||||||
|
|
||||||
class KonachanTagExtractor(booru.TagMixin, KonachanExtractor):
|
|
||||||
"""Extractor for images from konachan.com based on search-tags"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
|
||||||
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)")
|
|
||||||
test = (
|
|
||||||
("https://konachan.com/post?tags=patata", {
|
|
||||||
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
|
|
||||||
}),
|
|
||||||
("https://konachan.net/post?tags=patata"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class KonachanPoolExtractor(booru.PoolMixin, KonachanExtractor):
|
|
||||||
"""Extractor for image-pools from konachan.com"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
|
||||||
r"/pool/show/(?P<pool>\d+)")
|
|
||||||
test = (
|
|
||||||
("https://konachan.com/pool/show/95", {
|
|
||||||
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
|
|
||||||
}),
|
|
||||||
("https://konachan.net/pool/show/95"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class KonachanPostExtractor(booru.PostMixin, KonachanExtractor):
|
|
||||||
"""Extractor for single images from konachan.com"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
|
||||||
r"/post/show/(?P<post>\d+)")
|
|
||||||
test = (
|
|
||||||
("https://konachan.com/post/show/205189", {
|
|
||||||
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
|
|
||||||
"options": (("tags", True),),
|
|
||||||
"keyword": {
|
|
||||||
"tags_artist": "patata",
|
|
||||||
"tags_character": "clownpiece",
|
|
||||||
"tags_copyright": "touhou",
|
|
||||||
"tags_general": str,
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
("https://konachan.net/post/show/205189"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class KonachanPopularExtractor(booru.MoebooruPopularMixin, KonachanExtractor):
|
|
||||||
"""Extractor for popular images from konachan.com"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
|
||||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
|
||||||
r"(?:\?(?P<query>[^#]*))?")
|
|
||||||
test = (
|
|
||||||
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
|
|
||||||
"count": 20,
|
|
||||||
}),
|
|
||||||
("https://konachan.com/post/popular_recent"),
|
|
||||||
("https://konachan.net/post/popular_recent"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
super().__init__(match)
|
|
||||||
self.api_url = (
|
|
||||||
"https://konachan.{tld}/post/popular_{scale}.json".format(
|
|
||||||
tld=match.group("tld"), scale=self.scale))
|
|
||||||
281
gallery_dl/extractor/moebooru.py
Normal file
281
gallery_dl/extractor/moebooru.py
Normal file
@@ -0,0 +1,281 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2020 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extractors for Moebooru based sites"""
|
||||||
|
|
||||||
|
from .common import Extractor, Message, generate_extractors
|
||||||
|
from .. import text
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
|
class MoebooruExtractor(Extractor):
|
||||||
|
"""Base class for Moebooru extractors"""
|
||||||
|
basecategory = "moebooru"
|
||||||
|
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||||
|
page_start = 1
|
||||||
|
per_page = 50
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
extended_tags = self.config("tags", False)
|
||||||
|
data = self.metadata()
|
||||||
|
for post in self.posts():
|
||||||
|
try:
|
||||||
|
url = self._prepare_post(post, extended_tags)
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
post.update(data)
|
||||||
|
text.nameext_from_url(url, post)
|
||||||
|
yield Message.Directory, post
|
||||||
|
yield Message.Url, url, post
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
return ()
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
return ()
|
||||||
|
|
||||||
|
def skip(self, num):
|
||||||
|
pages = num // self.per_page
|
||||||
|
self.page_start += pages
|
||||||
|
return pages * self.per_page
|
||||||
|
|
||||||
|
def _prepare_post(self, post, extended_tags=False):
|
||||||
|
url = post["file_url"]
|
||||||
|
if url[0] == "/":
|
||||||
|
url = self.root + url
|
||||||
|
if extended_tags:
|
||||||
|
self._fetch_extended_tags(post)
|
||||||
|
post["date"] = text.parse_timestamp(post["created_at"])
|
||||||
|
return url
|
||||||
|
|
||||||
|
def _fetch_extended_tags(self, post):
|
||||||
|
url = "{}/post/show/{}".format(self.root, post["id"])
|
||||||
|
page = self.request(url).text
|
||||||
|
html = text.extract(page, '<ul id="tag-', '</ul>')[0]
|
||||||
|
if html:
|
||||||
|
tags = collections.defaultdict(list)
|
||||||
|
pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)")
|
||||||
|
for tag_type, tag_name in pattern.findall(html):
|
||||||
|
tags[tag_type].append(text.unquote(tag_name))
|
||||||
|
for key, value in tags.items():
|
||||||
|
post["tags_" + key] = " ".join(value)
|
||||||
|
|
||||||
|
def _pagination(self, url, params):
|
||||||
|
params["page"] = self.page_start
|
||||||
|
params["limit"] = self.per_page
|
||||||
|
|
||||||
|
while True:
|
||||||
|
posts = self.request(url, params=params).json()
|
||||||
|
yield from posts
|
||||||
|
|
||||||
|
if len(posts) < self.per_page:
|
||||||
|
return
|
||||||
|
params["page"] += 1
|
||||||
|
|
||||||
|
|
||||||
|
class MoebooruTagExtractor(MoebooruExtractor):
|
||||||
|
subcategory = "tag"
|
||||||
|
directory_fmt = ("{category}", "{search_tags}")
|
||||||
|
archive_fmt = "t_{search_tags}_{id}"
|
||||||
|
pattern_fmt = r"/post\?(?:[^&#]*&)*tags=([^&#]+)"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
MoebooruExtractor.__init__(self, match)
|
||||||
|
self.tags = text.unquote(match.group(1).replace("+", " "))
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
return {"search_tags": self.tags}
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
params = {"tags": self.tags}
|
||||||
|
return self._pagination(self.root + "/post.json", params)
|
||||||
|
|
||||||
|
|
||||||
|
class MoebooruPoolExtractor(MoebooruExtractor):
|
||||||
|
subcategory = "pool"
|
||||||
|
directory_fmt = ("{category}", "pool", "{pool}")
|
||||||
|
archive_fmt = "p_{pool}_{id}"
|
||||||
|
pattern_fmt = r"/pool/show/(\d+)"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
MoebooruExtractor.__init__(self, match)
|
||||||
|
self.pool_id = match.group(1)
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
return {"pool": text.parse_int(self.pool_id)}
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
params = {"tags": "pool:" + self.pool_id}
|
||||||
|
return self._pagination(self.root + "/post.json", params)
|
||||||
|
|
||||||
|
|
||||||
|
class MoebooruPostExtractor(MoebooruExtractor):
|
||||||
|
subcategory = "post"
|
||||||
|
archive_fmt = "{id}"
|
||||||
|
pattern_fmt = r"/post/show/(\d+)"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
MoebooruExtractor.__init__(self, match)
|
||||||
|
self.post_id = match.group(1)
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
params = {"tags": "id:" + self.post_id}
|
||||||
|
return self.request(self.root + "/post.json", params=params).json()
|
||||||
|
|
||||||
|
|
||||||
|
class MoebooruPopularExtractor(MoebooruExtractor):
|
||||||
|
subcategory = "popular"
|
||||||
|
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
|
||||||
|
archive_fmt = "P_{scale[0]}_{date}_{id}"
|
||||||
|
pattern_fmt = r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
MoebooruExtractor.__init__(self, match)
|
||||||
|
self.scale, self.query = match.groups()
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
self.params = params = text.parse_query(self.query)
|
||||||
|
|
||||||
|
if "year" in params:
|
||||||
|
date = "{:>04}-{:>02}-{:>02}".format(
|
||||||
|
params["year"],
|
||||||
|
params.get("month", "01"),
|
||||||
|
params.get("day", "01"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
date = datetime.date.today().isoformat()
|
||||||
|
|
||||||
|
scale = self.scale
|
||||||
|
if scale.startswith("by_"):
|
||||||
|
scale = scale[3:]
|
||||||
|
if scale == "week":
|
||||||
|
date = datetime.date.fromisoformat(date)
|
||||||
|
date = (date - datetime.timedelta(days=date.weekday())).isoformat()
|
||||||
|
elif scale == "month":
|
||||||
|
date = date[:-3]
|
||||||
|
|
||||||
|
return {"date": date, "scale": scale}
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
url = "{}/post/popular_{}.json".format(self.root, self.scale)
|
||||||
|
return self.request(url, params=self.params).json()
|
||||||
|
|
||||||
|
|
||||||
|
EXTRACTORS = {
|
||||||
|
"yandere": {
|
||||||
|
"root": "https://yande.re",
|
||||||
|
"test-tag": ("https://yande.re/post?tags=ouzoku+armor", {
|
||||||
|
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||||
|
}),
|
||||||
|
"test-pool": ("https://yande.re/pool/show/318", {
|
||||||
|
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
|
||||||
|
}),
|
||||||
|
"test-post": ("https://yande.re/post/show/51824", {
|
||||||
|
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||||
|
"options": (("tags", True),),
|
||||||
|
"keyword": {
|
||||||
|
"tags_artist": "sasaki_tamaru",
|
||||||
|
"tags_circle": "softhouse_chara",
|
||||||
|
"tags_copyright": "ouzoku",
|
||||||
|
"tags_general": str,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
"test-popular": (
|
||||||
|
("https://yande.re/post/popular_by_month?month=6&year=2014", {
|
||||||
|
"count": 40,
|
||||||
|
}),
|
||||||
|
("https://yande.re/post/popular_recent"),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"konachan": {
|
||||||
|
"root": "https://konachan.com",
|
||||||
|
"pattern": r"konachan\.(?:com|net)",
|
||||||
|
"test-tag": (
|
||||||
|
("https://konachan.com/post?tags=patata", {
|
||||||
|
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
|
||||||
|
}),
|
||||||
|
("https://konachan.net/post?tags=patata"),
|
||||||
|
),
|
||||||
|
"test-pool": (
|
||||||
|
("https://konachan.com/pool/show/95", {
|
||||||
|
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
|
||||||
|
}),
|
||||||
|
("https://konachan.net/pool/show/95"),
|
||||||
|
),
|
||||||
|
"test-post": (
|
||||||
|
("https://konachan.com/post/show/205189", {
|
||||||
|
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
|
||||||
|
"options": (("tags", True),),
|
||||||
|
"keyword": {
|
||||||
|
"tags_artist": "patata",
|
||||||
|
"tags_character": "clownpiece",
|
||||||
|
"tags_copyright": "touhou",
|
||||||
|
"tags_general": str,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
("https://konachan.net/post/show/205189"),
|
||||||
|
),
|
||||||
|
"test-popular": (
|
||||||
|
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
|
||||||
|
"count": 20,
|
||||||
|
}),
|
||||||
|
("https://konachan.com/post/popular_recent"),
|
||||||
|
("https://konachan.net/post/popular_recent"),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"hypnohub": {
|
||||||
|
"root": "https://hypnohub.net",
|
||||||
|
"test-tag": ("https://hypnohub.net/post?tags=gonoike_biwa", {
|
||||||
|
"url": "2848abe3e433ad39bfdf5be5874682faaccea5be",
|
||||||
|
}),
|
||||||
|
"test-pool": ("https://hypnohub.net/pool/show/61", {
|
||||||
|
"url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf",
|
||||||
|
}),
|
||||||
|
"test-post": ("https://hypnohub.net/post/show/73964", {
|
||||||
|
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
|
||||||
|
"options": (("tags", True),),
|
||||||
|
"keyword": {
|
||||||
|
"tags_artist": "gonoike_biwa icontrol_(manipper)",
|
||||||
|
"tags_character": "komaru_naegi",
|
||||||
|
"tags_copyright": "dangan_ronpa dangan_ronpa_another_episode",
|
||||||
|
"tags_general": str,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
"test-popular": (
|
||||||
|
("https://hypnohub.net/post/popular_by_month?month=6&year=2014", {
|
||||||
|
"count": 20,
|
||||||
|
}),
|
||||||
|
("https://hypnohub.net/post/popular_recent"),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"lolibooru": {
|
||||||
|
"root": "https://lolibooru.moe",
|
||||||
|
"test-tag" : ("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29",),
|
||||||
|
"test-pool" : ("https://lolibooru.moe/pool/show/239",),
|
||||||
|
"test-post" : ("https://lolibooru.moe/post/show/287835",),
|
||||||
|
"test-popular": ("https://lolibooru.moe/post/popular_recent",),
|
||||||
|
},
|
||||||
|
"sakugabooru": {
|
||||||
|
"root": "https://www.sakugabooru.com",
|
||||||
|
"pattern": r"(?:www\.)?sakugabooru\.com",
|
||||||
|
"test-tag" : ("https://www.sakugabooru.com/post?tags=nichijou",),
|
||||||
|
"test-pool" : ("https://www.sakugabooru.com/pool/show/54",),
|
||||||
|
"test-post" : ("https://www.sakugabooru.com/post/show/125570",),
|
||||||
|
"test-popular": ("https://www.sakugabooru.com/post/popular_recent",),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
generate_extractors(EXTRACTORS, globals(), (
|
||||||
|
MoebooruTagExtractor,
|
||||||
|
MoebooruPoolExtractor,
|
||||||
|
MoebooruPostExtractor,
|
||||||
|
MoebooruPopularExtractor,
|
||||||
|
))
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2015-2019 Mike Fährmann
|
|
||||||
#
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
|
||||||
# published by the Free Software Foundation.
|
|
||||||
|
|
||||||
"""Extract images from https://yande.re/"""
|
|
||||||
|
|
||||||
from . import booru
|
|
||||||
|
|
||||||
|
|
||||||
class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
|
|
||||||
"""Base class for yandere extractors"""
|
|
||||||
category = "yandere"
|
|
||||||
api_url = "https://yande.re/post.json"
|
|
||||||
post_url = "https://yande.re/post/show/{}"
|
|
||||||
|
|
||||||
|
|
||||||
class YandereTagExtractor(booru.TagMixin, YandereExtractor):
|
|
||||||
"""Extractor for images from yande.re based on search-tags"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?yande\.re"
|
|
||||||
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)")
|
|
||||||
test = ("https://yande.re/post?tags=ouzoku+armor", {
|
|
||||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class YanderePoolExtractor(booru.PoolMixin, YandereExtractor):
|
|
||||||
"""Extractor for image-pools from yande.re"""
|
|
||||||
pattern = r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(?P<pool>\d+)"
|
|
||||||
test = ("https://yande.re/pool/show/318", {
|
|
||||||
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class YanderePostExtractor(booru.PostMixin, YandereExtractor):
|
|
||||||
"""Extractor for single images from yande.re"""
|
|
||||||
pattern = r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"
|
|
||||||
test = ("https://yande.re/post/show/51824", {
|
|
||||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
|
||||||
"options": (("tags", True),),
|
|
||||||
"keyword": {
|
|
||||||
"tags_artist": "sasaki_tamaru",
|
|
||||||
"tags_circle": "softhouse_chara",
|
|
||||||
"tags_copyright": "ouzoku",
|
|
||||||
"tags_general": str,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class YanderePopularExtractor(booru.MoebooruPopularMixin, YandereExtractor):
|
|
||||||
"""Extractor for popular images from yande.re"""
|
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?yande\.re"
|
|
||||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
|
||||||
r"(?:\?(?P<query>[^#]*))?")
|
|
||||||
test = (
|
|
||||||
("https://yande.re/post/popular_by_month?month=6&year=2014", {
|
|
||||||
"count": 40,
|
|
||||||
}),
|
|
||||||
("https://yande.re/post/popular_recent"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
super().__init__(match)
|
|
||||||
self.api_url = "https://yande.re/post/popular_{scale}.json".format(
|
|
||||||
scale=self.scale)
|
|
||||||
Reference in New Issue
Block a user