[booru] rewrite using Mixin classes (#59)
- improved code structure - improved URL patterns - better pagination to work around page limits on - Danbooru - e621 - 3dbooru
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
# Copyright 2015-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -11,57 +11,65 @@
|
||||
from . import booru
|
||||
|
||||
|
||||
class ThreedeebooruExtractor(booru.JSONBooruExtractor):
|
||||
class ThreedeebooruExtractor(booru.JsonParserMixin,
|
||||
booru.MoebooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for 3dbooru extractors"""
|
||||
category = "3dbooru"
|
||||
api_url = "http://behoimi.org/post/index.json"
|
||||
headers = {
|
||||
"Referer": "http://behoimi.org/post/show/",
|
||||
"Accept-Encoding": "identity",
|
||||
}
|
||||
page_limit = 1000
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.session.headers.update({
|
||||
"Referer": "http://behoimi.org/post/show/",
|
||||
"Accept-Encoding": "identity",
|
||||
})
|
||||
|
||||
|
||||
class ThreedeebooruTagExtractor(ThreedeebooruExtractor,
|
||||
booru.BooruTagExtractor):
|
||||
class ThreedeebooruTagExtractor(booru.TagMixin,
|
||||
ThreedeebooruExtractor):
|
||||
"""Extractor for images from behoimi.org based on search-tags"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post"
|
||||
r"(?:/(?:index)?)?\?tags=([^&]+)"]
|
||||
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)"]
|
||||
test = [("http://behoimi.org/post?tags=himekawa_azuru+dress", {
|
||||
"url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
|
||||
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
|
||||
})]
|
||||
|
||||
|
||||
class ThreedeebooruPoolExtractor(ThreedeebooruExtractor,
|
||||
booru.BooruPoolExtractor):
|
||||
class ThreedeebooruPoolExtractor(booru.PoolMixin,
|
||||
ThreedeebooruExtractor):
|
||||
"""Extractor for image-pools from behoimi.org"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"]
|
||||
test = [("http://behoimi.org/pool/show/27", {
|
||||
"url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
|
||||
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
|
||||
})]
|
||||
|
||||
|
||||
class ThreedeebooruPostExtractor(ThreedeebooruExtractor,
|
||||
booru.BooruPostExtractor):
|
||||
class ThreedeebooruPostExtractor(booru.PostMixin,
|
||||
ThreedeebooruExtractor):
|
||||
"""Extractor for single images from behoimi.org"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"]
|
||||
test = [("http://behoimi.org/post/show/140852", {
|
||||
"url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
|
||||
"content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4",
|
||||
})]
|
||||
|
||||
|
||||
class ThreedeebooruPopularExtractor(ThreedeebooruExtractor,
|
||||
booru.BooruPopularExtractor):
|
||||
class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin,
|
||||
ThreedeebooruExtractor):
|
||||
"""Extractor for popular images from behoimi.org"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/popular_"
|
||||
r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org"
|
||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
||||
r"(?:\?(?P<query>[^#]*))?"]
|
||||
test = [("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
|
||||
"url": "a447e115fdab60c25ab71c4fdb1b9f509bc23f99",
|
||||
"count": 20,
|
||||
})]
|
||||
|
||||
@property
|
||||
def api_url(self):
|
||||
return "http://behoimi.org/post/popular_" + self.scale + ".json"
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.api_url = "http://behoimi.org/post/popular_{scale}.json".format(
|
||||
scale=self.scale)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
# Copyright 2015-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -10,8 +10,8 @@
|
||||
|
||||
from .common import SharedConfigExtractor, Message
|
||||
from .. import text
|
||||
import xml.etree.ElementTree as ET
|
||||
import urllib.parse
|
||||
from urllib.parse import urljoin
|
||||
from xml.etree import ElementTree
|
||||
import datetime
|
||||
import operator
|
||||
|
||||
@@ -20,166 +20,158 @@ class BooruExtractor(SharedConfigExtractor):
|
||||
"""Base class for all booru extractors"""
|
||||
basecategory = "booru"
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
headers = {}
|
||||
pagestart = 1
|
||||
pagekey = "page"
|
||||
api_url = ""
|
||||
per_page = 50
|
||||
page_start = 1
|
||||
page_limit = None
|
||||
sort = False
|
||||
|
||||
def __init__(self):
|
||||
SharedConfigExtractor.__init__(self)
|
||||
self.session.headers.update(self.headers)
|
||||
self.params = {"limit": 50}
|
||||
self.setup()
|
||||
def __init__(self, match):
|
||||
super().__init__()
|
||||
self.params = {}
|
||||
|
||||
def skip(self, num):
|
||||
pages = num // self.per_page
|
||||
if self.page_limit and pages + self.page_start > self.page_limit:
|
||||
pages = self.page_limit - self.page_start
|
||||
self.page_start += pages
|
||||
return pages * self.per_page
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, self.get_job_metadata()
|
||||
for data in self.items_impl():
|
||||
try:
|
||||
url = self.get_file_url(data)
|
||||
data = self.get_file_metadata(data)
|
||||
yield Message.Url, url, data
|
||||
except KeyError:
|
||||
continue
|
||||
yield Message.Directory, self.get_metadata()
|
||||
|
||||
def skip(self, num):
|
||||
limit = self.params["limit"]
|
||||
pages = num // limit
|
||||
self.pagestart += pages
|
||||
return pages * limit
|
||||
self.reset_page()
|
||||
while True:
|
||||
images, count = self.parse_response(
|
||||
self.request(self.api_url, params=self.params))
|
||||
|
||||
def items_impl(self):
|
||||
pass
|
||||
for data in images:
|
||||
try:
|
||||
url = data["file_url"]
|
||||
if url.startswith("/"):
|
||||
url = urljoin(self.api_url, url)
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
def setup(self):
|
||||
pass
|
||||
if count < self.per_page:
|
||||
return
|
||||
self.update_page(data)
|
||||
|
||||
def update_page(self, reset=False):
|
||||
"""Update the value of the 'page' parameter"""
|
||||
# Override this method in derived classes if necessary.
|
||||
# It is usually enough to just adjust the 'page' attribute
|
||||
if reset is False:
|
||||
self.params[self.pagekey] += 1
|
||||
else:
|
||||
self.params[self.pagekey] = self.pagestart
|
||||
def reset_page(self):
|
||||
"""Initialize params to point to the first page"""
|
||||
self.params["page"] = self.page_start
|
||||
|
||||
def get_job_metadata(self):
|
||||
def update_page(self, data):
|
||||
"""Update params to point to the next page"""
|
||||
|
||||
def get_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
# Override this method in derived classes
|
||||
return {}
|
||||
|
||||
def get_file_metadata(self, data):
|
||||
"""Collect metadata for a downloadable file"""
|
||||
return text.nameext_from_url(self.get_file_url(data), data)
|
||||
|
||||
def get_file_url(self, data):
|
||||
"""Extract download-url from 'data'"""
|
||||
url = data["file_url"]
|
||||
if url.startswith("/"):
|
||||
url = urllib.parse.urljoin(self.api_url, url)
|
||||
return url
|
||||
|
||||
|
||||
class JSONBooruExtractor(BooruExtractor):
|
||||
"""Base class for JSON based API responses"""
|
||||
class JsonParserMixin():
|
||||
"""Class for JSON based API responses"""
|
||||
sort = False
|
||||
|
||||
def items_impl(self):
|
||||
self.update_page(reset=True)
|
||||
while True:
|
||||
images = self.request(self.api_url, params=self.params).json()
|
||||
if self.sort:
|
||||
images.sort(key=operator.itemgetter("score", "id"),
|
||||
reverse=True)
|
||||
yield from images
|
||||
if len(images) < self.params["limit"]:
|
||||
return
|
||||
self.update_page()
|
||||
def parse_response(self, response):
|
||||
images = response.json()
|
||||
if self.sort:
|
||||
images.sort(key=operator.itemgetter("score", "id"),
|
||||
reverse=True)
|
||||
return images, len(images)
|
||||
|
||||
|
||||
class XMLBooruExtractor(BooruExtractor):
|
||||
"""Base class for XML based API responses"""
|
||||
def items_impl(self):
|
||||
self.update_page(reset=True)
|
||||
while True:
|
||||
root = ET.fromstring(
|
||||
self.request(self.api_url, params=self.params).text
|
||||
)
|
||||
for item in root:
|
||||
yield item.attrib
|
||||
if len(root) < self.params["limit"]:
|
||||
return
|
||||
self.update_page()
|
||||
class XmlParserMixin():
|
||||
"""Class for XML based API responses"""
|
||||
def parse_response(self, response):
|
||||
root = ElementTree.fromstring(response.text)
|
||||
return map(lambda x: x.attrib, root), len(root)
|
||||
|
||||
|
||||
class BooruTagExtractor(BooruExtractor):
|
||||
"""Extractor for images based on search-tags"""
|
||||
class DanbooruPageMixin():
|
||||
"""Pagination for Danbooru v2"""
|
||||
def update_page(self, data):
|
||||
self.params["page"] = "b{}".format(data["id"])
|
||||
|
||||
|
||||
class MoebooruPageMixin():
|
||||
"""Pagination for Moebooru and Danbooru v1"""
|
||||
def update_page(self, data):
|
||||
print("update:", self.params)
|
||||
if self.page_limit:
|
||||
self.params["page"] = None
|
||||
self.params["before_id"] = data["id"]
|
||||
else:
|
||||
self.params["page"] += 1
|
||||
|
||||
|
||||
class GelbooruPageMixin():
|
||||
"""Pagination for Gelbooru-like sites"""
|
||||
def reset_page(self):
|
||||
self.params["pid"] = self.page_start - 1
|
||||
|
||||
def update_page(self, data):
|
||||
self.params["pid"] += 1
|
||||
|
||||
|
||||
class TagMixin():
|
||||
"""Extraction of images based on search-tags"""
|
||||
subcategory = "tag"
|
||||
directory_fmt = ["{category}", "{tags}"]
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self)
|
||||
self.tags = text.unquote(match.group(1).replace("+", " "))
|
||||
super().__init__(match)
|
||||
self.tags = text.unquote(match.group("tags").replace("+", " "))
|
||||
self.params["tags"] = self.tags
|
||||
self.params["limit"] = self.per_page
|
||||
|
||||
def get_job_metadata(self):
|
||||
def get_metadata(self):
|
||||
return {"tags": self.tags}
|
||||
|
||||
|
||||
class BooruPoolExtractor(BooruExtractor):
|
||||
"""Extractor for image-pools"""
|
||||
class PoolMixin():
|
||||
"""Extraction of image-pools"""
|
||||
subcategory = "pool"
|
||||
directory_fmt = ["{category}", "pool", "{pool}"]
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self)
|
||||
self.pool = match.group(1)
|
||||
super().__init__(match)
|
||||
self.pool = match.group("pool")
|
||||
self.params["tags"] = "pool:" + self.pool
|
||||
self.params["limit"] = self.per_page
|
||||
|
||||
def get_job_metadata(self):
|
||||
def get_metadata(self):
|
||||
return {"pool": self.pool}
|
||||
|
||||
|
||||
class BooruPostExtractor(BooruExtractor):
|
||||
"""Extractor for single images"""
|
||||
class PostMixin():
|
||||
"""Extraction of a single image-post"""
|
||||
subcategory = "post"
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self)
|
||||
self.post = match.group(1)
|
||||
super().__init__(match)
|
||||
self.post = match.group("post")
|
||||
self.params["tags"] = "id:" + self.post
|
||||
|
||||
def get_metadata(self):
|
||||
return {}
|
||||
|
||||
class BooruPopularExtractor(BooruExtractor):
|
||||
"""Extractor for popular images"""
|
||||
|
||||
class PopularMixin():
|
||||
"""Extraction and metadata handling for Danbooru v2"""
|
||||
subcategory = "popular"
|
||||
directory_fmt = ["{category}", "popular", "{scale}", "{date}"]
|
||||
page_start = None
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self)
|
||||
super().__init__(match)
|
||||
self.sort = True
|
||||
self.scale = match.group(1)
|
||||
self.params.update(text.parse_query(match.group(2)))
|
||||
self.params.update(text.parse_query(match.group("query")))
|
||||
|
||||
def get_job_metadata(self, fmt="%Y-%m-%d"):
|
||||
if "scale" in self.params:
|
||||
scale = self.params["scale"]
|
||||
elif self.scale:
|
||||
scale = self.scale
|
||||
if scale.startswith("by_"):
|
||||
scale = scale[3:]
|
||||
else:
|
||||
scale = "day"
|
||||
|
||||
if "date" in self.params:
|
||||
date = self.params["date"][:10]
|
||||
elif "year" in self.params:
|
||||
date = "{:>04}-{:>02}-{:>02}".format(
|
||||
self.params["year"],
|
||||
self.params.get("month", "01"),
|
||||
self.params.get("day", "01"))
|
||||
else:
|
||||
date = datetime.datetime.utcnow().strftime(fmt)
|
||||
def get_metadata(self, fmt="%Y-%m-%d"):
|
||||
date = self.get_date() or datetime.datetime.utcnow().strftime(fmt)
|
||||
scale = self.get_scale() or "day"
|
||||
|
||||
if scale == "week":
|
||||
dt = datetime.datetime.strptime(date, fmt)
|
||||
@@ -189,3 +181,33 @@ class BooruPopularExtractor(BooruExtractor):
|
||||
date = date[:-3]
|
||||
|
||||
return {"date": date, "scale": scale}
|
||||
|
||||
def get_scale(self):
|
||||
if "scale" in self.params:
|
||||
return self.params["scale"]
|
||||
return None
|
||||
|
||||
def get_date(self):
|
||||
if "date" in self.params:
|
||||
return self.params["date"][:10]
|
||||
return None
|
||||
|
||||
|
||||
class MoebooruPopularMixin(PopularMixin):
|
||||
"""Extraction and metadata handling for Moebooru and Danbooru v1"""
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.scale = match.group("scale")
|
||||
|
||||
def get_date(self):
|
||||
if "year" in self.params:
|
||||
return "{:>04}-{:>02}-{:>02}".format(
|
||||
self.params["year"],
|
||||
self.params.get("month", "01"),
|
||||
self.params.get("day", "01"))
|
||||
return None
|
||||
|
||||
def get_scale(self):
|
||||
if self.scale and self.scale.startswith("by_"):
|
||||
return self.scale[3:]
|
||||
return self.scale
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
# Copyright 2014-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -11,16 +11,19 @@
|
||||
from . import booru
|
||||
|
||||
|
||||
class DanbooruExtractor(booru.JSONBooruExtractor):
|
||||
class DanbooruExtractor(booru.JsonParserMixin,
|
||||
booru.DanbooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for danbooru extractors"""
|
||||
category = "danbooru"
|
||||
api_url = "https://danbooru.donmai.us/posts.json"
|
||||
page_limit = 1000
|
||||
|
||||
|
||||
class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
|
||||
class DanbooruTagExtractor(booru.TagMixin, DanbooruExtractor):
|
||||
"""Extractor for images from danbooru based on search-tags"""
|
||||
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
|
||||
r"/posts\?(?:[^&#]*&)*tags=([^&#]+)"]
|
||||
r"/posts\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
|
||||
test = [
|
||||
("https://danbooru.donmai.us/posts?tags=bonocho", {
|
||||
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
|
||||
@@ -30,28 +33,28 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
|
||||
]
|
||||
|
||||
|
||||
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
|
||||
class DanbooruPoolExtractor(booru.PoolMixin, DanbooruExtractor):
|
||||
"""Extractor for image-pools from danbooru"""
|
||||
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
|
||||
r"/pools/(\d+)"]
|
||||
r"/pools/(?P<pool>\d+)"]
|
||||
test = [("https://danbooru.donmai.us/pools/7659", {
|
||||
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
|
||||
})]
|
||||
|
||||
|
||||
class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor):
|
||||
class DanbooruPostExtractor(booru.PostMixin, DanbooruExtractor):
|
||||
"""Extractor for single images from danbooru"""
|
||||
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
|
||||
r"/posts/(\d+)"]
|
||||
r"/posts/(?P<post>\d+)"]
|
||||
test = [("https://danbooru.donmai.us/posts/294929", {
|
||||
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
|
||||
})]
|
||||
|
||||
|
||||
class DanbooruPopularExtractor(DanbooruExtractor, booru.BooruPopularExtractor):
|
||||
class DanbooruPopularExtractor(booru.PopularMixin, DanbooruExtractor):
|
||||
"""Extractor for popular images from danbooru"""
|
||||
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
|
||||
r"/explore/posts/popular()(?:\?([^#]*))?"]
|
||||
r"/explore/posts/popular(?:\?(?P<query>[^#]*))?"]
|
||||
test = [
|
||||
("https://danbooru.donmai.us/explore/posts/popular", None),
|
||||
(("https://danbooru.donmai.us/explore/posts/popular"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
# Copyright 2014-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -11,50 +11,58 @@
|
||||
from . import booru
|
||||
|
||||
|
||||
class E621Extractor(booru.JSONBooruExtractor):
|
||||
class E621Extractor(booru.JsonParserMixin,
|
||||
booru.MoebooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for e621 extractors"""
|
||||
category = "e621"
|
||||
api_url = "https://e621.net/post/index.json"
|
||||
page_limit = 750
|
||||
|
||||
|
||||
class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
|
||||
class E621TagExtractor(booru.TagMixin, E621Extractor):
|
||||
"""Extractor for images from e621.net based on search-tags"""
|
||||
pattern = [
|
||||
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)",
|
||||
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)",
|
||||
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/(?P<tags>[^/?&#]+)",
|
||||
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=(?P<tags>[^&#]+)",
|
||||
]
|
||||
test = [
|
||||
("https://e621.net/post/index/1/anry", {
|
||||
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
|
||||
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
|
||||
}),
|
||||
("https://e621.net/post?tags=anry", None),
|
||||
]
|
||||
test = [("https://e621.net/post/index/1/anry", {
|
||||
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
|
||||
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
|
||||
})]
|
||||
|
||||
|
||||
class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
|
||||
class E621PoolExtractor(booru.PoolMixin, E621Extractor):
|
||||
"""Extractor for image-pools from e621.net"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(?P<pool>\d+)"]
|
||||
test = [("https://e621.net/pool/show/73", {
|
||||
"url": "842f2fb065c7c339486a9b1d689020b8569888ed",
|
||||
"content": "c2c87b7a9150509496cddc75ccab08109922876a",
|
||||
})]
|
||||
|
||||
|
||||
class E621PostExtractor(E621Extractor, booru.BooruPostExtractor):
|
||||
class E621PostExtractor(booru.PostMixin, E621Extractor):
|
||||
"""Extractor for single images from e621.net"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(?P<post>\d+)"]
|
||||
test = [("https://e621.net/post/show/535", {
|
||||
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
|
||||
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
|
||||
})]
|
||||
|
||||
|
||||
class E621PopularExtractor(E621Extractor, booru.BooruPopularExtractor):
|
||||
class E621PopularExtractor(booru.MoebooruPopularMixin, E621Extractor):
|
||||
"""Extractor for popular images from 621.net"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/popular_by_"
|
||||
r"(day|week|month)(?:\?([^#]*))?"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?e621\.net"
|
||||
r"/post/popular_by_(?P<scale>day|week|month)"
|
||||
r"(?:\?(?P<query>[^#]*))?"]
|
||||
test = [("https://e621.net/post/popular_by_month?month=6&year=2013", {
|
||||
"count": 32,
|
||||
})]
|
||||
|
||||
@property
|
||||
def api_url(self):
|
||||
return "https://e621.net/post/popular_by_" + self.scale + ".json"
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.api_url = "https://e621.net/post/popular_by_{scale}.json".format(
|
||||
scale=self.scale)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
# Copyright 2015-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -11,44 +11,69 @@
|
||||
from . import booru
|
||||
|
||||
|
||||
class KonachanExtractor(booru.JSONBooruExtractor):
|
||||
class KonachanExtractor(booru.JsonParserMixin,
|
||||
booru.MoebooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for konachan extractors"""
|
||||
category = "konachan"
|
||||
api_url = "https://konachan.com/post.json"
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.api_url = "https://konachan.{tld}/post.json".format(
|
||||
tld=match.group("tld"))
|
||||
|
||||
|
||||
class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
|
||||
class KonachanTagExtractor(booru.TagMixin, KonachanExtractor):
|
||||
"""Extractor for images from konachan.com based on search-tags"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"]
|
||||
test = [("http://konachan.com/post?tags=patata", {
|
||||
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
|
||||
})]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
||||
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
|
||||
test = [
|
||||
("http://konachan.com/post?tags=patata", {
|
||||
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
|
||||
}),
|
||||
("http://konachan.net/post?tags=patata", None),
|
||||
]
|
||||
|
||||
|
||||
class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
|
||||
class KonachanPoolExtractor(booru.PoolMixin, KonachanExtractor):
|
||||
"""Extractor for image-pools from konachan.com"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"]
|
||||
test = [("http://konachan.com/pool/show/95", {
|
||||
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
|
||||
})]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
||||
r"/pool/show/(?P<pool>\d+)"]
|
||||
test = [
|
||||
("http://konachan.com/pool/show/95", {
|
||||
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
|
||||
}),
|
||||
("http://konachan.net/pool/show/95", None),
|
||||
]
|
||||
|
||||
|
||||
class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor):
|
||||
class KonachanPostExtractor(booru.PostMixin, KonachanExtractor):
|
||||
"""Extractor for single images from konachan.com"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"]
|
||||
test = [("http://konachan.com/post/show/205189", {
|
||||
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
|
||||
})]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
||||
r"/post/show/(?P<post>\d+)"]
|
||||
test = [
|
||||
("http://konachan.com/post/show/205189", {
|
||||
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
|
||||
}),
|
||||
("http://konachan.com/post/show/205189", None),
|
||||
]
|
||||
|
||||
|
||||
class KonachanPopularExtractor(KonachanExtractor, booru.BooruPopularExtractor):
|
||||
class KonachanPopularExtractor(booru.MoebooruPopularMixin, KonachanExtractor):
|
||||
"""Extractor for popular images from konachan.com"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/popular_"
|
||||
r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
|
||||
test = [("https://konachan.com/post/popular_by_month?month=11&year=2010", {
|
||||
"count": 20,
|
||||
})]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
|
||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
||||
r"(?:\?(?P<query>[^#]*))?"]
|
||||
test = [
|
||||
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
|
||||
"count": 20,
|
||||
}),
|
||||
("https://konachan.com/post/popular_recent", None),
|
||||
("https://konachan.net/post/popular_recent", None),
|
||||
]
|
||||
|
||||
@property
|
||||
def api_url(self):
|
||||
return "https://konachan.com/post/popular_" + self.scale + ".json"
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.api_url = (
|
||||
"https://konachan.{tld}/post/popular_{scale}.json".format(
|
||||
tld=match.group("tld"), scale=self.scale))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
# Copyright 2016-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -11,31 +11,33 @@
|
||||
from . import booru
|
||||
|
||||
|
||||
class Rule34Extractor(booru.XMLBooruExtractor):
|
||||
class Rule34Extractor(booru.XmlParserMixin,
|
||||
booru.GelbooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for rule34 extractors"""
|
||||
category = "rule34"
|
||||
api_url = "https://rule34.xxx/index.php"
|
||||
pagestart = 0
|
||||
pagekey = "pid"
|
||||
page_limit = 4000
|
||||
|
||||
def setup(self):
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.params.update({"page": "dapi", "s": "post", "q": "index"})
|
||||
|
||||
|
||||
class Rule34TagExtractor(Rule34Extractor, booru.BooruTagExtractor):
|
||||
class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
|
||||
"""Extractor for images from rule34.xxx based on search-tags"""
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
|
||||
r"\?page=post&s=list&tags=([^&]+)")]
|
||||
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")]
|
||||
test = [("http://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
|
||||
"url": "104094495973edfe7e764c8f2dd42017163322aa",
|
||||
"content": "a01768c6f86f32eb7ebbdeb87c30b0d9968d7f97",
|
||||
})]
|
||||
|
||||
|
||||
class Rule34PostExtractor(Rule34Extractor, booru.BooruPostExtractor):
|
||||
class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
|
||||
"""Extractor for single images from rule34.xxx"""
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
|
||||
r"\?page=post&s=view&id=(\d+)")]
|
||||
r"\?page=post&s=view&id=(?P<post>\d+)")]
|
||||
test = [("http://rule34.xxx/index.php?page=post&s=view&id=1974854", {
|
||||
"url": "3b1f9817785868d1cd94d5376d20478eed591965",
|
||||
"content": "fd2820df78fb937532da0a46f7af6cefc4dc94be",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
# Copyright 2015-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -11,31 +11,32 @@
|
||||
from . import booru
|
||||
|
||||
|
||||
class SafebooruExtractor(booru.XMLBooruExtractor):
|
||||
class SafebooruExtractor(booru.XmlParserMixin,
|
||||
booru.GelbooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for safebooru extractors"""
|
||||
category = "safebooru"
|
||||
api_url = "https://safebooru.org/index.php"
|
||||
pagestart = 0
|
||||
pagekey = "pid"
|
||||
|
||||
def setup(self):
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.params.update({"page": "dapi", "s": "post", "q": "index"})
|
||||
|
||||
|
||||
class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
|
||||
class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
|
||||
"""Extractor for images from safebooru.org based on search-tags"""
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
|
||||
r"\?page=post&s=list&tags=([^&]+)")]
|
||||
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")]
|
||||
test = [("http://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
|
||||
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
|
||||
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
|
||||
})]
|
||||
|
||||
|
||||
class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
|
||||
class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
|
||||
"""Extractor for single images from safebooru.org"""
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
|
||||
r"\?page=post&s=view&id=(\d+)")]
|
||||
r"\?page=post&s=view&id=(?P<post>\d+)")]
|
||||
test = [("http://safebooru.org/index.php?page=post&s=view&id=1169132", {
|
||||
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
|
||||
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
# Copyright 2015-2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -11,40 +11,44 @@
|
||||
from . import booru
|
||||
|
||||
|
||||
class YandereExtractor(booru.JSONBooruExtractor):
|
||||
class YandereExtractor(booru.JsonParserMixin,
|
||||
booru.MoebooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for yandere extractors"""
|
||||
category = "yandere"
|
||||
api_url = "https://yande.re/post.json"
|
||||
|
||||
|
||||
class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
|
||||
class YandereTagExtractor(booru.TagMixin, YandereExtractor):
|
||||
"""Extractor for images from yande.re based on search-tags"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
|
||||
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
|
||||
test = [("https://yande.re/post?tags=ouzoku+armor", {
|
||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||
})]
|
||||
|
||||
|
||||
class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
|
||||
class YanderePoolExtractor(booru.PoolMixin, YandereExtractor):
|
||||
"""Extractor for image-pools from yande.re"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(?P<pool>\d+)"]
|
||||
test = [("https://yande.re/pool/show/318", {
|
||||
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
|
||||
})]
|
||||
|
||||
|
||||
class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
|
||||
class YanderePostExtractor(booru.PostMixin, YandereExtractor):
|
||||
"""Extractor for single images from yande.re"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"]
|
||||
test = [("https://yande.re/post/show/51824", {
|
||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||
})]
|
||||
|
||||
|
||||
class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor):
|
||||
class YanderePopularExtractor(booru.MoebooruPopularMixin, YandereExtractor):
|
||||
"""Extractor for popular images from yande.re"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/popular_"
|
||||
r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
|
||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
||||
r"(?:\?(?P<query>[^#]*))?"]
|
||||
test = [
|
||||
("https://yande.re/post/popular_by_month?month=6&year=2014", {
|
||||
"count": 40,
|
||||
@@ -52,6 +56,7 @@ class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor):
|
||||
("https://yande.re/post/popular_recent", None),
|
||||
]
|
||||
|
||||
@property
|
||||
def api_url(self):
|
||||
return "https://yande.re/post/popular_" + self.scale + ".json"
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.api_url = "https://yande.re/post/popular_{scale}.json".format(
|
||||
scale=self.scale)
|
||||
|
||||
Reference in New Issue
Block a user