[booru] rewrite using Mixin classes (#59)

- improved code structure
- improved URL patterns
- better pagination to work around page limits on
  - Danbooru
  - e621
  - 3dbooru
This commit is contained in:
Mike Fährmann
2018-01-03 23:52:01 +01:00
parent 0876541e43
commit 9e8a84ab6c
8 changed files with 298 additions and 224 deletions

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2017 Mike Fährmann
# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,57 +11,65 @@
from . import booru
class ThreedeebooruExtractor(booru.JSONBooruExtractor):
class ThreedeebooruExtractor(booru.JsonParserMixin,
booru.MoebooruPageMixin,
booru.BooruExtractor):
"""Base class for 3dbooru extractors"""
category = "3dbooru"
api_url = "http://behoimi.org/post/index.json"
headers = {
"Referer": "http://behoimi.org/post/show/",
"Accept-Encoding": "identity",
}
page_limit = 1000
def __init__(self, match):
super().__init__(match)
self.session.headers.update({
"Referer": "http://behoimi.org/post/show/",
"Accept-Encoding": "identity",
})
class ThreedeebooruTagExtractor(ThreedeebooruExtractor,
booru.BooruTagExtractor):
class ThreedeebooruTagExtractor(booru.TagMixin,
ThreedeebooruExtractor):
"""Extractor for images from behoimi.org based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post"
r"(?:/(?:index)?)?\?tags=([^&]+)"]
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)"]
test = [("http://behoimi.org/post?tags=himekawa_azuru+dress", {
"url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
})]
class ThreedeebooruPoolExtractor(ThreedeebooruExtractor,
booru.BooruPoolExtractor):
class ThreedeebooruPoolExtractor(booru.PoolMixin,
ThreedeebooruExtractor):
"""Extractor for image-pools from behoimi.org"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"]
test = [("http://behoimi.org/pool/show/27", {
"url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
})]
class ThreedeebooruPostExtractor(ThreedeebooruExtractor,
booru.BooruPostExtractor):
class ThreedeebooruPostExtractor(booru.PostMixin,
ThreedeebooruExtractor):
"""Extractor for single images from behoimi.org"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"]
test = [("http://behoimi.org/post/show/140852", {
"url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
"content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4",
})]
class ThreedeebooruPopularExtractor(ThreedeebooruExtractor,
booru.BooruPopularExtractor):
class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin,
ThreedeebooruExtractor):
"""Extractor for popular images from behoimi.org"""
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/popular_"
r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?"]
test = [("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
"url": "a447e115fdab60c25ab71c4fdb1b9f509bc23f99",
"count": 20,
})]
@property
def api_url(self):
return "http://behoimi.org/post/popular_" + self.scale + ".json"
def __init__(self, match):
super().__init__(match)
self.api_url = "http://behoimi.org/post/popular_{scale}.json".format(
scale=self.scale)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2017 Mike Fährmann
# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,8 +10,8 @@
from .common import SharedConfigExtractor, Message
from .. import text
import xml.etree.ElementTree as ET
import urllib.parse
from urllib.parse import urljoin
from xml.etree import ElementTree
import datetime
import operator
@@ -20,166 +20,158 @@ class BooruExtractor(SharedConfigExtractor):
"""Base class for all booru extractors"""
basecategory = "booru"
filename_fmt = "{category}_{id}_{md5}.{extension}"
headers = {}
pagestart = 1
pagekey = "page"
api_url = ""
per_page = 50
page_start = 1
page_limit = None
sort = False
def __init__(self):
SharedConfigExtractor.__init__(self)
self.session.headers.update(self.headers)
self.params = {"limit": 50}
self.setup()
def __init__(self, match):
super().__init__()
self.params = {}
def skip(self, num):
pages = num // self.per_page
if self.page_limit and pages + self.page_start > self.page_limit:
pages = self.page_limit - self.page_start
self.page_start += pages
return pages * self.per_page
def items(self):
yield Message.Version, 1
yield Message.Directory, self.get_job_metadata()
for data in self.items_impl():
try:
url = self.get_file_url(data)
data = self.get_file_metadata(data)
yield Message.Url, url, data
except KeyError:
continue
yield Message.Directory, self.get_metadata()
def skip(self, num):
limit = self.params["limit"]
pages = num // limit
self.pagestart += pages
return pages * limit
self.reset_page()
while True:
images, count = self.parse_response(
self.request(self.api_url, params=self.params))
def items_impl(self):
pass
for data in images:
try:
url = data["file_url"]
if url.startswith("/"):
url = urljoin(self.api_url, url)
yield Message.Url, url, text.nameext_from_url(url, data)
except KeyError:
continue
def setup(self):
pass
if count < self.per_page:
return
self.update_page(data)
def update_page(self, reset=False):
"""Update the value of the 'page' parameter"""
# Override this method in derived classes if necessary.
# It is usually enough to just adjust the 'page' attribute
if reset is False:
self.params[self.pagekey] += 1
else:
self.params[self.pagekey] = self.pagestart
def reset_page(self):
"""Initialize params to point to the first page"""
self.params["page"] = self.page_start
def get_job_metadata(self):
def update_page(self, data):
"""Update params to point to the next page"""
def get_metadata(self):
"""Collect metadata for extractor-job"""
# Override this method in derived classes
return {}
def get_file_metadata(self, data):
"""Collect metadata for a downloadable file"""
return text.nameext_from_url(self.get_file_url(data), data)
def get_file_url(self, data):
"""Extract download-url from 'data'"""
url = data["file_url"]
if url.startswith("/"):
url = urllib.parse.urljoin(self.api_url, url)
return url
class JSONBooruExtractor(BooruExtractor):
"""Base class for JSON based API responses"""
class JsonParserMixin():
"""Class for JSON based API responses"""
sort = False
def items_impl(self):
self.update_page(reset=True)
while True:
images = self.request(self.api_url, params=self.params).json()
if self.sort:
images.sort(key=operator.itemgetter("score", "id"),
reverse=True)
yield from images
if len(images) < self.params["limit"]:
return
self.update_page()
def parse_response(self, response):
images = response.json()
if self.sort:
images.sort(key=operator.itemgetter("score", "id"),
reverse=True)
return images, len(images)
class XMLBooruExtractor(BooruExtractor):
"""Base class for XML based API responses"""
def items_impl(self):
self.update_page(reset=True)
while True:
root = ET.fromstring(
self.request(self.api_url, params=self.params).text
)
for item in root:
yield item.attrib
if len(root) < self.params["limit"]:
return
self.update_page()
class XmlParserMixin():
"""Class for XML based API responses"""
def parse_response(self, response):
root = ElementTree.fromstring(response.text)
return map(lambda x: x.attrib, root), len(root)
class BooruTagExtractor(BooruExtractor):
"""Extractor for images based on search-tags"""
class DanbooruPageMixin():
"""Pagination for Danbooru v2"""
def update_page(self, data):
self.params["page"] = "b{}".format(data["id"])
class MoebooruPageMixin():
"""Pagination for Moebooru and Danbooru v1"""
def update_page(self, data):
print("update:", self.params)
if self.page_limit:
self.params["page"] = None
self.params["before_id"] = data["id"]
else:
self.params["page"] += 1
class GelbooruPageMixin():
"""Pagination for Gelbooru-like sites"""
def reset_page(self):
self.params["pid"] = self.page_start - 1
def update_page(self, data):
self.params["pid"] += 1
class TagMixin():
"""Extraction of images based on search-tags"""
subcategory = "tag"
directory_fmt = ["{category}", "{tags}"]
def __init__(self, match):
BooruExtractor.__init__(self)
self.tags = text.unquote(match.group(1).replace("+", " "))
super().__init__(match)
self.tags = text.unquote(match.group("tags").replace("+", " "))
self.params["tags"] = self.tags
self.params["limit"] = self.per_page
def get_job_metadata(self):
def get_metadata(self):
return {"tags": self.tags}
class BooruPoolExtractor(BooruExtractor):
"""Extractor for image-pools"""
class PoolMixin():
"""Extraction of image-pools"""
subcategory = "pool"
directory_fmt = ["{category}", "pool", "{pool}"]
def __init__(self, match):
BooruExtractor.__init__(self)
self.pool = match.group(1)
super().__init__(match)
self.pool = match.group("pool")
self.params["tags"] = "pool:" + self.pool
self.params["limit"] = self.per_page
def get_job_metadata(self):
def get_metadata(self):
return {"pool": self.pool}
class BooruPostExtractor(BooruExtractor):
"""Extractor for single images"""
class PostMixin():
"""Extraction of a single image-post"""
subcategory = "post"
def __init__(self, match):
BooruExtractor.__init__(self)
self.post = match.group(1)
super().__init__(match)
self.post = match.group("post")
self.params["tags"] = "id:" + self.post
def get_metadata(self):
return {}
class BooruPopularExtractor(BooruExtractor):
"""Extractor for popular images"""
class PopularMixin():
"""Extraction and metadata handling for Danbooru v2"""
subcategory = "popular"
directory_fmt = ["{category}", "popular", "{scale}", "{date}"]
page_start = None
def __init__(self, match):
BooruExtractor.__init__(self)
super().__init__(match)
self.sort = True
self.scale = match.group(1)
self.params.update(text.parse_query(match.group(2)))
self.params.update(text.parse_query(match.group("query")))
def get_job_metadata(self, fmt="%Y-%m-%d"):
if "scale" in self.params:
scale = self.params["scale"]
elif self.scale:
scale = self.scale
if scale.startswith("by_"):
scale = scale[3:]
else:
scale = "day"
if "date" in self.params:
date = self.params["date"][:10]
elif "year" in self.params:
date = "{:>04}-{:>02}-{:>02}".format(
self.params["year"],
self.params.get("month", "01"),
self.params.get("day", "01"))
else:
date = datetime.datetime.utcnow().strftime(fmt)
def get_metadata(self, fmt="%Y-%m-%d"):
date = self.get_date() or datetime.datetime.utcnow().strftime(fmt)
scale = self.get_scale() or "day"
if scale == "week":
dt = datetime.datetime.strptime(date, fmt)
@@ -189,3 +181,33 @@ class BooruPopularExtractor(BooruExtractor):
date = date[:-3]
return {"date": date, "scale": scale}
def get_scale(self):
if "scale" in self.params:
return self.params["scale"]
return None
def get_date(self):
if "date" in self.params:
return self.params["date"][:10]
return None
class MoebooruPopularMixin(PopularMixin):
"""Extraction and metadata handling for Moebooru and Danbooru v1"""
def __init__(self, match):
super().__init__(match)
self.scale = match.group("scale")
def get_date(self):
if "year" in self.params:
return "{:>04}-{:>02}-{:>02}".format(
self.params["year"],
self.params.get("month", "01"),
self.params.get("day", "01"))
return None
def get_scale(self):
if self.scale and self.scale.startswith("by_"):
return self.scale[3:]
return self.scale

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2017 Mike Fährmann
# Copyright 2014-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,16 +11,19 @@
from . import booru
class DanbooruExtractor(booru.JSONBooruExtractor):
class DanbooruExtractor(booru.JsonParserMixin,
booru.DanbooruPageMixin,
booru.BooruExtractor):
"""Base class for danbooru extractors"""
category = "danbooru"
api_url = "https://danbooru.donmai.us/posts.json"
page_limit = 1000
class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
class DanbooruTagExtractor(booru.TagMixin, DanbooruExtractor):
"""Extractor for images from danbooru based on search-tags"""
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
r"/posts\?(?:[^&#]*&)*tags=([^&#]+)"]
r"/posts\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
test = [
("https://danbooru.donmai.us/posts?tags=bonocho", {
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
@@ -30,28 +33,28 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
]
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
class DanbooruPoolExtractor(booru.PoolMixin, DanbooruExtractor):
"""Extractor for image-pools from danbooru"""
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
r"/pools/(\d+)"]
r"/pools/(?P<pool>\d+)"]
test = [("https://danbooru.donmai.us/pools/7659", {
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
})]
class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor):
class DanbooruPostExtractor(booru.PostMixin, DanbooruExtractor):
"""Extractor for single images from danbooru"""
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
r"/posts/(\d+)"]
r"/posts/(?P<post>\d+)"]
test = [("https://danbooru.donmai.us/posts/294929", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
})]
class DanbooruPopularExtractor(DanbooruExtractor, booru.BooruPopularExtractor):
class DanbooruPopularExtractor(booru.PopularMixin, DanbooruExtractor):
"""Extractor for popular images from danbooru"""
pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
r"/explore/posts/popular()(?:\?([^#]*))?"]
r"/explore/posts/popular(?:\?(?P<query>[^#]*))?"]
test = [
("https://danbooru.donmai.us/explore/posts/popular", None),
(("https://danbooru.donmai.us/explore/posts/popular"

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2017 Mike Fährmann
# Copyright 2014-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,50 +11,58 @@
from . import booru
class E621Extractor(booru.JSONBooruExtractor):
class E621Extractor(booru.JsonParserMixin,
booru.MoebooruPageMixin,
booru.BooruExtractor):
"""Base class for e621 extractors"""
category = "e621"
api_url = "https://e621.net/post/index.json"
page_limit = 750
class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
class E621TagExtractor(booru.TagMixin, E621Extractor):
"""Extractor for images from e621.net based on search-tags"""
pattern = [
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)",
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)",
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/(?P<tags>[^/?&#]+)",
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=(?P<tags>[^&#]+)",
]
test = [
("https://e621.net/post/index/1/anry", {
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
}),
("https://e621.net/post?tags=anry", None),
]
test = [("https://e621.net/post/index/1/anry", {
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
})]
class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
class E621PoolExtractor(booru.PoolMixin, E621Extractor):
"""Extractor for image-pools from e621.net"""
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(?P<pool>\d+)"]
test = [("https://e621.net/pool/show/73", {
"url": "842f2fb065c7c339486a9b1d689020b8569888ed",
"content": "c2c87b7a9150509496cddc75ccab08109922876a",
})]
class E621PostExtractor(E621Extractor, booru.BooruPostExtractor):
class E621PostExtractor(booru.PostMixin, E621Extractor):
"""Extractor for single images from e621.net"""
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(?P<post>\d+)"]
test = [("https://e621.net/post/show/535", {
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
})]
class E621PopularExtractor(E621Extractor, booru.BooruPopularExtractor):
class E621PopularExtractor(booru.MoebooruPopularMixin, E621Extractor):
"""Extractor for popular images from 621.net"""
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/popular_by_"
r"(day|week|month)(?:\?([^#]*))?"]
pattern = [r"(?:https?://)?(?:www\.)?e621\.net"
r"/post/popular_by_(?P<scale>day|week|month)"
r"(?:\?(?P<query>[^#]*))?"]
test = [("https://e621.net/post/popular_by_month?month=6&year=2013", {
"count": 32,
})]
@property
def api_url(self):
return "https://e621.net/post/popular_by_" + self.scale + ".json"
def __init__(self, match):
super().__init__(match)
self.api_url = "https://e621.net/post/popular_by_{scale}.json".format(
scale=self.scale)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2017 Mike Fährmann
# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,44 +11,69 @@
from . import booru
class KonachanExtractor(booru.JSONBooruExtractor):
class KonachanExtractor(booru.JsonParserMixin,
booru.MoebooruPageMixin,
booru.BooruExtractor):
"""Base class for konachan extractors"""
category = "konachan"
api_url = "https://konachan.com/post.json"
def __init__(self, match):
super().__init__(match)
self.api_url = "https://konachan.{tld}/post.json".format(
tld=match.group("tld"))
class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
class KonachanTagExtractor(booru.TagMixin, KonachanExtractor):
"""Extractor for images from konachan.com based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"]
test = [("http://konachan.com/post?tags=patata", {
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
})]
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
test = [
("http://konachan.com/post?tags=patata", {
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
}),
("http://konachan.net/post?tags=patata", None),
]
class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
class KonachanPoolExtractor(booru.PoolMixin, KonachanExtractor):
"""Extractor for image-pools from konachan.com"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"]
test = [("http://konachan.com/pool/show/95", {
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
})]
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/pool/show/(?P<pool>\d+)"]
test = [
("http://konachan.com/pool/show/95", {
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
}),
("http://konachan.net/pool/show/95", None),
]
class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor):
class KonachanPostExtractor(booru.PostMixin, KonachanExtractor):
"""Extractor for single images from konachan.com"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"]
test = [("http://konachan.com/post/show/205189", {
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
})]
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post/show/(?P<post>\d+)"]
test = [
("http://konachan.com/post/show/205189", {
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
}),
("http://konachan.com/post/show/205189", None),
]
class KonachanPopularExtractor(KonachanExtractor, booru.BooruPopularExtractor):
class KonachanPopularExtractor(booru.MoebooruPopularMixin, KonachanExtractor):
"""Extractor for popular images from konachan.com"""
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/popular_"
r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
test = [("https://konachan.com/post/popular_by_month?month=11&year=2010", {
"count": 20,
})]
pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?"]
test = [
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
"count": 20,
}),
("https://konachan.com/post/popular_recent", None),
("https://konachan.net/post/popular_recent", None),
]
@property
def api_url(self):
return "https://konachan.com/post/popular_" + self.scale + ".json"
def __init__(self, match):
super().__init__(match)
self.api_url = (
"https://konachan.{tld}/post/popular_{scale}.json".format(
tld=match.group("tld"), scale=self.scale))

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2017 Mike Fährmann
# Copyright 2016-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,31 +11,33 @@
from . import booru
class Rule34Extractor(booru.XMLBooruExtractor):
class Rule34Extractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for rule34 extractors"""
category = "rule34"
api_url = "https://rule34.xxx/index.php"
pagestart = 0
pagekey = "pid"
page_limit = 4000
def setup(self):
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class Rule34TagExtractor(Rule34Extractor, booru.BooruTagExtractor):
class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
"""Extractor for images from rule34.xxx based on search-tags"""
pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=list&tags=([^&]+)")]
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")]
test = [("http://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"url": "104094495973edfe7e764c8f2dd42017163322aa",
"content": "a01768c6f86f32eb7ebbdeb87c30b0d9968d7f97",
})]
class Rule34PostExtractor(Rule34Extractor, booru.BooruPostExtractor):
class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
"""Extractor for single images from rule34.xxx"""
pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
r"\?page=post&s=view&id=(\d+)")]
r"\?page=post&s=view&id=(?P<post>\d+)")]
test = [("http://rule34.xxx/index.php?page=post&s=view&id=1974854", {
"url": "3b1f9817785868d1cd94d5376d20478eed591965",
"content": "fd2820df78fb937532da0a46f7af6cefc4dc94be",

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2017 Mike Fährmann
# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,31 +11,32 @@
from . import booru
class SafebooruExtractor(booru.XMLBooruExtractor):
class SafebooruExtractor(booru.XmlParserMixin,
booru.GelbooruPageMixin,
booru.BooruExtractor):
"""Base class for safebooru extractors"""
category = "safebooru"
api_url = "https://safebooru.org/index.php"
pagestart = 0
pagekey = "pid"
def setup(self):
def __init__(self, match):
super().__init__(match)
self.params.update({"page": "dapi", "s": "post", "q": "index"})
class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
"""Extractor for images from safebooru.org based on search-tags"""
pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=list&tags=([^&]+)")]
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")]
test = [("http://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
})]
class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
"""Extractor for single images from safebooru.org"""
pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=view&id=(\d+)")]
r"\?page=post&s=view&id=(?P<post>\d+)")]
test = [("http://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2017 Mike Fährmann
# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,40 +11,44 @@
from . import booru
class YandereExtractor(booru.JSONBooruExtractor):
class YandereExtractor(booru.JsonParserMixin,
booru.MoebooruPageMixin,
booru.BooruExtractor):
"""Base class for yandere extractors"""
category = "yandere"
api_url = "https://yande.re/post.json"
class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
class YandereTagExtractor(booru.TagMixin, YandereExtractor):
"""Extractor for images from yande.re based on search-tags"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"]
pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
test = [("https://yande.re/post?tags=ouzoku+armor", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
})]
class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
class YanderePoolExtractor(booru.PoolMixin, YandereExtractor):
"""Extractor for image-pools from yande.re"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(?P<pool>\d+)"]
test = [("https://yande.re/pool/show/318", {
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
})]
class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
class YanderePostExtractor(booru.PostMixin, YandereExtractor):
"""Extractor for single images from yande.re"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"]
test = [("https://yande.re/post/show/51824", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
})]
class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor):
class YanderePopularExtractor(booru.MoebooruPopularMixin, YandereExtractor):
"""Extractor for popular images from yande.re"""
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/popular_"
r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?"]
test = [
("https://yande.re/post/popular_by_month?month=6&year=2014", {
"count": 40,
@@ -52,6 +56,7 @@ class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor):
("https://yande.re/post/popular_recent", None),
]
@property
def api_url(self):
return "https://yande.re/post/popular_" + self.scale + ".json"
def __init__(self, match):
super().__init__(match)
self.api_url = "https://yande.re/post/popular_{scale}.json".format(
scale=self.scale)