[reactor] inherit from BaseExtractor
This commit is contained in:
@@ -379,12 +379,6 @@ Consider all sites to be NSFW unless otherwise known.
|
|||||||
<td>Publications, User Profiles</td>
|
<td>Publications, User Profiles</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
|
||||||
<td>Joyreactor</td>
|
|
||||||
<td>http://joyreactor.cc/</td>
|
|
||||||
<td>Posts, Search Results, Tag Searches, User Profiles</td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
<tr>
|
||||||
<td>Keenspot</td>
|
<td>Keenspot</td>
|
||||||
<td>http://www.keenspot.com/</td>
|
<td>http://www.keenspot.com/</td>
|
||||||
@@ -619,12 +613,6 @@ Consider all sites to be NSFW unless otherwise known.
|
|||||||
<td>Galleries, User Profiles</td>
|
<td>Galleries, User Profiles</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
|
||||||
<td>Pornreactor</td>
|
|
||||||
<td>http://pornreactor.cc/</td>
|
|
||||||
<td>Posts, Search Results, Tag Searches, User Profiles</td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
<tr>
|
||||||
<td>Postimg</td>
|
<td>Postimg</td>
|
||||||
<td>https://postimages.org/</td>
|
<td>https://postimages.org/</td>
|
||||||
@@ -950,6 +938,28 @@ Consider all sites to be NSFW unless otherwise known.
|
|||||||
<td>API Key</td>
|
<td>API Key</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td colspan="4"><strong>Reactor Instances</strong></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>JoyReactor</td>
|
||||||
|
<td>http://joyreactor.cc/</td>
|
||||||
|
<td>Posts, Search Results, Tag Searches, User Profiles</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>PornReactor</td>
|
||||||
|
<td>http://pornreactor.cc/</td>
|
||||||
|
<td>Posts, Search Results, Tag Searches, User Profiles</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Reactor</td>
|
||||||
|
<td>http://reactor.cc/</td>
|
||||||
|
<td>Posts, Search Results, Tag Searches, User Profiles</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="4"><strong>Moebooru and MyImouto</strong></td>
|
<td colspan="4"><strong>Moebooru and MyImouto</strong></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|||||||
@@ -8,29 +8,29 @@
|
|||||||
|
|
||||||
"""Generic extractors for *reactor sites"""
|
"""Generic extractors for *reactor sites"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import json
|
import json
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
|
|
||||||
|
|
||||||
|
class ReactorExtractor(BaseExtractor):
|
||||||
class ReactorExtractor(Extractor):
|
|
||||||
"""Base class for *reactor.cc extractors"""
|
"""Base class for *reactor.cc extractors"""
|
||||||
basecategory = "reactor"
|
basecategory = "reactor"
|
||||||
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
|
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
|
||||||
archive_fmt = "{post_id}_{num}"
|
archive_fmt = "{post_id}_{num}"
|
||||||
instances = ()
|
|
||||||
request_interval = 5.0
|
request_interval = 5.0
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.root = "http://" + match.group(1)
|
url = text.ensure_http_scheme(match.group(0), "http://")
|
||||||
|
pos = url.index("/", 10)
|
||||||
|
|
||||||
|
self.root, self.path = url[:pos], url[pos:]
|
||||||
self.session.headers["Referer"] = self.root
|
self.session.headers["Referer"] = self.root
|
||||||
self.gif = self.config("gif", False)
|
self.gif = self.config("gif", False)
|
||||||
|
|
||||||
if not self.category:
|
if self.category == "reactor":
|
||||||
# set category based on domain name
|
# set category based on domain name
|
||||||
netloc = urllib.parse.urlsplit(self.root).netloc
|
netloc = urllib.parse.urlsplit(self.root).netloc
|
||||||
self.category = netloc.rpartition(".")[0]
|
self.category = netloc.rpartition(".")[0]
|
||||||
@@ -50,7 +50,7 @@ class ReactorExtractor(Extractor):
|
|||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
"""Return all relevant post-objects"""
|
"""Return all relevant post-objects"""
|
||||||
return self._pagination(self.url)
|
return self._pagination(self.root + self.path)
|
||||||
|
|
||||||
def _pagination(self, url):
|
def _pagination(self, url):
|
||||||
while True:
|
while True:
|
||||||
@@ -145,29 +145,78 @@ class ReactorExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BASE_PATTERN = ReactorExtractor.update({
|
||||||
|
"reactor" : {
|
||||||
|
"root": "http://reactor.cc",
|
||||||
|
"pattern": r"(?:[^/.]+\.)?reactor\.cc",
|
||||||
|
},
|
||||||
|
"joyreactor" : {
|
||||||
|
"root": "http://joyreactor.cc",
|
||||||
|
"pattern": r"(?:www\.)?joyreactor\.c(?:c|om)",
|
||||||
|
},
|
||||||
|
"pornreactor": {
|
||||||
|
"root": "http://pornreactor.cc",
|
||||||
|
"pattern": r"(?:www\.)?(?:pornreactor\.cc|fapreactor.com)",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
class ReactorTagExtractor(ReactorExtractor):
|
class ReactorTagExtractor(ReactorExtractor):
|
||||||
"""Extractor for tag searches on *reactor.cc sites"""
|
"""Extractor for tag searches on *reactor.cc sites"""
|
||||||
subcategory = "tag"
|
subcategory = "tag"
|
||||||
directory_fmt = ("{category}", "{search_tags}")
|
directory_fmt = ("{category}", "{search_tags}")
|
||||||
archive_fmt = "{search_tags}_{post_id}_{num}"
|
archive_fmt = "{search_tags}_{post_id}_{num}"
|
||||||
pattern = BASE_PATTERN + r"/tag/([^/?#]+)"
|
pattern = BASE_PATTERN + r"/tag/([^/?#]+)"
|
||||||
test = ("http://anime.reactor.cc/tag/Anime+Art",)
|
test = (
|
||||||
|
("http://reactor.cc/tag/gif"),
|
||||||
|
("http://anime.reactor.cc/tag/Anime+Art"),
|
||||||
|
("http://joyreactor.cc/tag/Advent+Cirno", {
|
||||||
|
"count": ">= 15",
|
||||||
|
}),
|
||||||
|
("http://joyreactor.com/tag/Cirno", {
|
||||||
|
"url": "aa59090590b26f4654881301fe8fe748a51625a8",
|
||||||
|
}),
|
||||||
|
("http://pornreactor.cc/tag/RiceGnat", {
|
||||||
|
"range": "1-25",
|
||||||
|
"count": ">= 25",
|
||||||
|
}),
|
||||||
|
("http://fapreactor.com/tag/RiceGnat"),
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.tag = match.group(2)
|
self.tag = match.group(match.lastindex)
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
||||||
|
|
||||||
|
|
||||||
class ReactorSearchExtractor(ReactorTagExtractor):
|
class ReactorSearchExtractor(ReactorExtractor):
|
||||||
"""Extractor for search results on *reactor.cc sites"""
|
"""Extractor for search results on *reactor.cc sites"""
|
||||||
subcategory = "search"
|
subcategory = "search"
|
||||||
directory_fmt = ("{category}", "search", "{search_tags}")
|
directory_fmt = ("{category}", "search", "{search_tags}")
|
||||||
archive_fmt = "s_{search_tags}_{post_id}_{num}"
|
archive_fmt = "s_{search_tags}_{post_id}_{num}"
|
||||||
pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
|
pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
|
||||||
test = ("http://anime.reactor.cc/search?q=Art",)
|
test = (
|
||||||
|
("http://reactor.cc/search?q=Art"),
|
||||||
|
("http://joyreactor.cc/search/Nature", {
|
||||||
|
"range": "1-25",
|
||||||
|
"count": ">= 20",
|
||||||
|
}),
|
||||||
|
("http://joyreactor.com/search?q=Nature", {
|
||||||
|
"range": "1-25",
|
||||||
|
"count": ">= 20",
|
||||||
|
}),
|
||||||
|
("http://pornreactor.cc/search?q=ecchi+hentai"),
|
||||||
|
("http://fapreactor.com/search/ecchi+hentai"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
ReactorExtractor.__init__(self, match)
|
||||||
|
self.tag = match.group(match.lastindex)
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
||||||
|
|
||||||
|
|
||||||
class ReactorUserExtractor(ReactorExtractor):
|
class ReactorUserExtractor(ReactorExtractor):
|
||||||
@@ -175,11 +224,23 @@ class ReactorUserExtractor(ReactorExtractor):
|
|||||||
subcategory = "user"
|
subcategory = "user"
|
||||||
directory_fmt = ("{category}", "user", "{user}")
|
directory_fmt = ("{category}", "user", "{user}")
|
||||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
|
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
|
||||||
test = ("http://anime.reactor.cc/user/Shuster",)
|
test = (
|
||||||
|
("http://reactor.cc/user/Dioklet"),
|
||||||
|
("http://anime.reactor.cc/user/Shuster"),
|
||||||
|
("http://joyreactor.cc/user/hemantic"),
|
||||||
|
("http://joyreactor.com/user/Tacoman123", {
|
||||||
|
"url": "60ce9a3e3db791a0899f7fb7643b5b87d09ae3b5",
|
||||||
|
}),
|
||||||
|
("http://pornreactor.cc/user/Disillusion", {
|
||||||
|
"range": "1-25",
|
||||||
|
"count": ">= 20",
|
||||||
|
}),
|
||||||
|
("http://fapreactor.com/user/Disillusion"),
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.user = match.group(2)
|
self.user = match.group(match.lastindex)
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"user": text.unescape(self.user).replace("+", " ")}
|
return {"user": text.unescape(self.user).replace("+", " ")}
|
||||||
@@ -189,75 +250,11 @@ class ReactorPostExtractor(ReactorExtractor):
|
|||||||
"""Extractor for single posts on *reactor.cc sites"""
|
"""Extractor for single posts on *reactor.cc sites"""
|
||||||
subcategory = "post"
|
subcategory = "post"
|
||||||
pattern = BASE_PATTERN + r"/post/(\d+)"
|
pattern = BASE_PATTERN + r"/post/(\d+)"
|
||||||
test = ("http://anime.reactor.cc/post/3576250",)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
ReactorExtractor.__init__(self, match)
|
|
||||||
self.post_id = match.group(2)
|
|
||||||
|
|
||||||
def items(self):
|
|
||||||
post = self.request(self.url).text
|
|
||||||
pos = post.find('class="uhead">')
|
|
||||||
for image in self._parse_post(post[pos:]):
|
|
||||||
if image["num"] == 1:
|
|
||||||
yield Message.Directory, image
|
|
||||||
url = image["url"]
|
|
||||||
yield Message.Url, url, text.nameext_from_url(url, image)
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
|
||||||
# JoyReactor
|
|
||||||
|
|
||||||
JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
|
|
||||||
|
|
||||||
|
|
||||||
class JoyreactorTagExtractor(ReactorTagExtractor):
|
|
||||||
"""Extractor for tag searches on joyreactor.cc"""
|
|
||||||
category = "joyreactor"
|
|
||||||
pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)"
|
|
||||||
test = (
|
test = (
|
||||||
("http://joyreactor.cc/tag/Advent+Cirno", {
|
("http://reactor.cc/post/4999736", {
|
||||||
"count": ">= 15",
|
"url": "dfc74d150d7267384d8c229c4b82aa210755daa0",
|
||||||
}),
|
}),
|
||||||
("http://joyreactor.com/tag/Cirno", {
|
("http://anime.reactor.cc/post/3576250"),
|
||||||
"url": "aa59090590b26f4654881301fe8fe748a51625a8",
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JoyreactorSearchExtractor(ReactorSearchExtractor):
|
|
||||||
"""Extractor for search results on joyreactor.cc"""
|
|
||||||
category = "joyreactor"
|
|
||||||
pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
|
|
||||||
test = (
|
|
||||||
("http://joyreactor.cc/search/Nature", {
|
|
||||||
"range": "1-25",
|
|
||||||
"count": ">= 20",
|
|
||||||
}),
|
|
||||||
("http://joyreactor.com/search?q=Nature", {
|
|
||||||
"range": "1-25",
|
|
||||||
"count": ">= 20",
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JoyreactorUserExtractor(ReactorUserExtractor):
|
|
||||||
"""Extractor for all posts of a user on joyreactor.cc"""
|
|
||||||
category = "joyreactor"
|
|
||||||
pattern = JR_BASE_PATTERN + r"/user/([^/?#]+)"
|
|
||||||
test = (
|
|
||||||
("http://joyreactor.cc/user/hemantic"),
|
|
||||||
("http://joyreactor.com/user/Tacoman123", {
|
|
||||||
"url": "60ce9a3e3db791a0899f7fb7643b5b87d09ae3b5",
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JoyreactorPostExtractor(ReactorPostExtractor):
|
|
||||||
"""Extractor for single posts on joyreactor.cc"""
|
|
||||||
category = "joyreactor"
|
|
||||||
pattern = JR_BASE_PATTERN + r"/post/(\d+)"
|
|
||||||
test = (
|
|
||||||
("http://joyreactor.com/post/3721876", { # single image
|
("http://joyreactor.com/post/3721876", { # single image
|
||||||
"pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
|
"pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
|
||||||
r"/cartoon-painting-monster-lake-4841316.jpeg",
|
r"/cartoon-painting-monster-lake-4841316.jpeg",
|
||||||
@@ -281,57 +278,6 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
|
|||||||
("http://joyreactor.cc/post/1299", { # "malformed" JSON
|
("http://joyreactor.cc/post/1299", { # "malformed" JSON
|
||||||
"url": "ab02c6eb7b4035ad961b29ee0770ee41be2fcc39",
|
"url": "ab02c6eb7b4035ad961b29ee0770ee41be2fcc39",
|
||||||
}),
|
}),
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
|
||||||
# PornReactor
|
|
||||||
|
|
||||||
PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
|
|
||||||
|
|
||||||
|
|
||||||
class PornreactorTagExtractor(ReactorTagExtractor):
|
|
||||||
"""Extractor for tag searches on pornreactor.cc"""
|
|
||||||
category = "pornreactor"
|
|
||||||
pattern = PR_BASE_PATTERN + r"/tag/([^/?#]+)"
|
|
||||||
test = (
|
|
||||||
("http://pornreactor.cc/tag/RiceGnat", {
|
|
||||||
"range": "1-25",
|
|
||||||
"count": ">= 25",
|
|
||||||
}),
|
|
||||||
("http://fapreactor.com/tag/RiceGnat"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PornreactorSearchExtractor(ReactorSearchExtractor):
|
|
||||||
"""Extractor for search results on pornreactor.cc"""
|
|
||||||
category = "pornreactor"
|
|
||||||
pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
|
|
||||||
test = (
|
|
||||||
("http://pornreactor.cc/search?q=ecchi+hentai"),
|
|
||||||
("http://fapreactor.com/search/ecchi+hentai"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PornreactorUserExtractor(ReactorUserExtractor):
|
|
||||||
"""Extractor for all posts of a user on pornreactor.cc"""
|
|
||||||
category = "pornreactor"
|
|
||||||
pattern = PR_BASE_PATTERN + r"/user/([^/?#]+)"
|
|
||||||
test = (
|
|
||||||
("http://pornreactor.cc/user/Disillusion", {
|
|
||||||
"range": "1-25",
|
|
||||||
"count": ">= 20",
|
|
||||||
}),
|
|
||||||
("http://fapreactor.com/user/Disillusion"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PornreactorPostExtractor(ReactorPostExtractor):
|
|
||||||
"""Extractor for single posts on pornreactor.cc"""
|
|
||||||
category = "pornreactor"
|
|
||||||
subcategory = "post"
|
|
||||||
pattern = PR_BASE_PATTERN + r"/post/(\d+)"
|
|
||||||
test = (
|
|
||||||
("http://pornreactor.cc/post/863166", {
|
("http://pornreactor.cc/post/863166", {
|
||||||
"url": "a09fb0577489e1f9564c25d0ad576f81b19c2ef3",
|
"url": "a09fb0577489e1f9564c25d0ad576f81b19c2ef3",
|
||||||
"content": "ec6b0568bfb1803648744077da082d14de844340",
|
"content": "ec6b0568bfb1803648744077da082d14de844340",
|
||||||
@@ -340,3 +286,16 @@ class PornreactorPostExtractor(ReactorPostExtractor):
|
|||||||
"url": "2a956ce0c90e8bc47b4392db4fa25ad1342f3e54",
|
"url": "2a956ce0c90e8bc47b4392db4fa25ad1342f3e54",
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
ReactorExtractor.__init__(self, match)
|
||||||
|
self.post_id = match.group(match.lastindex)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
post = self.request(self.root + self.path).text
|
||||||
|
pos = post.find('class="uhead">')
|
||||||
|
for image in self._parse_post(post[pos:]):
|
||||||
|
if image["num"] == 1:
|
||||||
|
yield Message.Directory, image
|
||||||
|
url = image["url"]
|
||||||
|
yield Message.Url, url, text.nameext_from_url(url, image)
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ CATEGORY_MAP = {
|
|||||||
"imagechest" : "ImageChest",
|
"imagechest" : "ImageChest",
|
||||||
"imgth" : "imgth",
|
"imgth" : "imgth",
|
||||||
"imgur" : "imgur",
|
"imgur" : "imgur",
|
||||||
|
"joyreactor" : "JoyReactor",
|
||||||
"kabeuchi" : "かべうち",
|
"kabeuchi" : "かべうち",
|
||||||
"kireicake" : "Kirei Cake",
|
"kireicake" : "Kirei Cake",
|
||||||
"lineblog" : "LINE BLOG",
|
"lineblog" : "LINE BLOG",
|
||||||
@@ -78,6 +79,7 @@ CATEGORY_MAP = {
|
|||||||
"paheal" : "rule #34",
|
"paheal" : "rule #34",
|
||||||
"photovogue" : "PhotoVogue",
|
"photovogue" : "PhotoVogue",
|
||||||
"pornimagesxxx" : "Porn Image",
|
"pornimagesxxx" : "Porn Image",
|
||||||
|
"pornreactor" : "PornReactor",
|
||||||
"powermanga" : "PowerManga",
|
"powermanga" : "PowerManga",
|
||||||
"readcomiconline": "Read Comic Online",
|
"readcomiconline": "Read Comic Online",
|
||||||
"rbt" : "RebeccaBlackTech",
|
"rbt" : "RebeccaBlackTech",
|
||||||
|
|||||||
Reference in New Issue
Block a user