@@ -4131,6 +4131,21 @@ Description
|
|||||||
Note: This requires 1-2 additional HTTP requests per post.
|
Note: This requires 1-2 additional HTTP requests per post.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.zerochan.pagination
|
||||||
|
-----------------------------
|
||||||
|
Type
|
||||||
|
``string``
|
||||||
|
Default
|
||||||
|
``"api"``
|
||||||
|
Description
|
||||||
|
Controls how to paginate over tag search results.
|
||||||
|
|
||||||
|
* ``"api"``: Use the `JSON API <https://www.zerochan.net/api>`__
|
||||||
|
(no ``extension`` metadata)
|
||||||
|
* ``"html"``: Parse HTML pages
|
||||||
|
(limited to 100 pages * 24 posts)
|
||||||
|
|
||||||
|
|
||||||
extractor.[booru].tags
|
extractor.[booru].tags
|
||||||
----------------------
|
----------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
|
||||||
|
|
||||||
@@ -21,8 +21,11 @@ class ZerochanExtractor(BooruExtractor):
|
|||||||
root = "https://www.zerochan.net"
|
root = "https://www.zerochan.net"
|
||||||
filename_fmt = "{id}.{extension}"
|
filename_fmt = "{id}.{extension}"
|
||||||
archive_fmt = "{id}"
|
archive_fmt = "{id}"
|
||||||
|
page_start = 1
|
||||||
|
per_page = 250
|
||||||
cookies_domain = ".zerochan.net"
|
cookies_domain = ".zerochan.net"
|
||||||
cookies_names = ("z_id", "z_hash")
|
cookies_names = ("z_id", "z_hash")
|
||||||
|
request_interval = (0.5, 1.5)
|
||||||
|
|
||||||
def login(self):
|
def login(self):
|
||||||
self._logged_in = True
|
self._logged_in = True
|
||||||
@@ -86,7 +89,7 @@ class ZerochanExtractor(BooruExtractor):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _parse_entry_json(self, entry_id):
|
def _parse_entry_api(self, entry_id):
|
||||||
url = "{}/{}?json".format(self.root, entry_id)
|
url = "{}/{}?json".format(self.root, entry_id)
|
||||||
item = self.request(url).json()
|
item = self.request(url).json()
|
||||||
|
|
||||||
@@ -117,14 +120,22 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
ZerochanExtractor.__init__(self, match)
|
ZerochanExtractor.__init__(self, match)
|
||||||
self.search_tag, self.query = match.groups()
|
self.search_tag, self.query = match.groups()
|
||||||
|
|
||||||
|
def _init(self):
|
||||||
|
if self.config("pagination") == "html":
|
||||||
|
self.posts = self.posts_html
|
||||||
|
self.per_page = 24
|
||||||
|
else:
|
||||||
|
self.posts = self.posts_api
|
||||||
|
self.session.headers["User-Agent"] = util.USERAGENT
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unquote(
|
return {"search_tags": text.unquote(
|
||||||
self.search_tag.replace("+", " "))}
|
self.search_tag.replace("+", " "))}
|
||||||
|
|
||||||
def posts(self):
|
def posts_html(self):
|
||||||
url = self.root + "/" + self.search_tag
|
url = self.root + "/" + self.search_tag
|
||||||
params = text.parse_query(self.query)
|
params = text.parse_query(self.query)
|
||||||
params["p"] = text.parse_int(params.get("p"), 1)
|
params["p"] = text.parse_int(params.get("p"), self.page_start)
|
||||||
metadata = self.config("metadata")
|
metadata = self.config("metadata")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@@ -140,7 +151,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
if metadata:
|
if metadata:
|
||||||
entry_id = extr('href="/', '"')
|
entry_id = extr('href="/', '"')
|
||||||
post = self._parse_entry_html(entry_id)
|
post = self._parse_entry_html(entry_id)
|
||||||
post.update(self._parse_entry_json(entry_id))
|
post.update(self._parse_entry_api(entry_id))
|
||||||
yield post
|
yield post
|
||||||
else:
|
else:
|
||||||
yield {
|
yield {
|
||||||
@@ -157,6 +168,41 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
break
|
break
|
||||||
params["p"] += 1
|
params["p"] += 1
|
||||||
|
|
||||||
|
def posts_api(self):
|
||||||
|
url = self.root + "/" + self.search_tag
|
||||||
|
metadata = self.config("metadata")
|
||||||
|
params = {
|
||||||
|
"json": "1",
|
||||||
|
"l" : self.per_page,
|
||||||
|
"p" : self.page_start,
|
||||||
|
}
|
||||||
|
|
||||||
|
static = "https://static.zerochan.net/.full."
|
||||||
|
|
||||||
|
while True:
|
||||||
|
data = self.request(url, params=params).json()
|
||||||
|
try:
|
||||||
|
posts = data["items"]
|
||||||
|
except ValueError:
|
||||||
|
return
|
||||||
|
|
||||||
|
if metadata:
|
||||||
|
for post in posts:
|
||||||
|
post_id = post["id"]
|
||||||
|
post.update(self._parse_entry_html(post_id))
|
||||||
|
post.update(self._parse_entry_api(post_id))
|
||||||
|
else:
|
||||||
|
for post in posts:
|
||||||
|
base = static + str(post["id"])
|
||||||
|
post["file_url"] = base + ".jpg"
|
||||||
|
post["_fallback"] = (base + ".png",)
|
||||||
|
|
||||||
|
yield from posts
|
||||||
|
|
||||||
|
if not data.get("next"):
|
||||||
|
return
|
||||||
|
params["p"] += 1
|
||||||
|
|
||||||
|
|
||||||
class ZerochanImageExtractor(ZerochanExtractor):
|
class ZerochanImageExtractor(ZerochanExtractor):
|
||||||
subcategory = "image"
|
subcategory = "image"
|
||||||
@@ -170,5 +216,5 @@ class ZerochanImageExtractor(ZerochanExtractor):
|
|||||||
def posts(self):
|
def posts(self):
|
||||||
post = self._parse_entry_html(self.image_id)
|
post = self._parse_entry_html(self.image_id)
|
||||||
if self.config("metadata"):
|
if self.config("metadata"):
|
||||||
post.update(self._parse_entry_json(self.image_id))
|
post.update(self._parse_entry_api(self.image_id))
|
||||||
return (post,)
|
return (post,)
|
||||||
|
|||||||
@@ -12,8 +12,27 @@ __tests__ = (
|
|||||||
"#url" : "https://www.zerochan.net/Perth+%28Kantai+Collection%29",
|
"#url" : "https://www.zerochan.net/Perth+%28Kantai+Collection%29",
|
||||||
"#category": ("booru", "zerochan", "tag"),
|
"#category": ("booru", "zerochan", "tag"),
|
||||||
"#class" : zerochan.ZerochanTagExtractor,
|
"#class" : zerochan.ZerochanTagExtractor,
|
||||||
|
"#pattern" : r"https://static\.zerochan\.net/\.full\.\d+\.jpg",
|
||||||
|
"#count" : "> 50",
|
||||||
|
|
||||||
|
"extension" : r"jpg",
|
||||||
|
"file_url" : r"re:https://static\.zerochan\.net/\.full\.\d+\.jpg",
|
||||||
|
"filename" : r"re:\.full\.\d+",
|
||||||
|
"height" : int,
|
||||||
|
"id" : int,
|
||||||
|
"search_tags": "Perth (Kantai Collection)",
|
||||||
|
"tag" : r"re:(Perth \(Kantai Collection\)|Kantai Collection)",
|
||||||
|
"tags" : list,
|
||||||
|
"width" : int,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.zerochan.net/Perth+%28Kantai+Collection%29",
|
||||||
|
"#category": ("booru", "zerochan", "tag"),
|
||||||
|
"#class" : zerochan.ZerochanTagExtractor,
|
||||||
|
"#options" : {"pagination": "html"},
|
||||||
"#pattern" : r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
|
"#pattern" : r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
|
||||||
"#count" : "> 24",
|
"#count" : "> 45",
|
||||||
|
|
||||||
"extension" : r"re:jpg|png",
|
"extension" : r"re:jpg|png",
|
||||||
"file_url" : r"re:https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
|
"file_url" : r"re:https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
|
||||||
@@ -58,8 +77,37 @@ __tests__ = (
|
|||||||
"Theme:Personification",
|
"Theme:Personification",
|
||||||
"Theme:Pins",
|
"Theme:Pins",
|
||||||
"Theme:Ribbon",
|
"Theme:Ribbon",
|
||||||
"Theme:Shirt",
|
|
||||||
"Theme:Short Hair",
|
"Theme:Short Hair",
|
||||||
|
"Theme:Top",
|
||||||
|
],
|
||||||
|
"uploader": "YukinoTokisaki",
|
||||||
|
"width" : 1920,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.zerochan.net/2920445",
|
||||||
|
"#category": ("booru", "zerochan", "image"),
|
||||||
|
"#class" : zerochan.ZerochanImageExtractor,
|
||||||
|
"#pattern" : r"https://static\.zerochan\.net/Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
|
||||||
|
"#auth" : False,
|
||||||
|
|
||||||
|
"author" : "YeFan 葉凡",
|
||||||
|
"date" : "dt:2020-04-24 21:33:44",
|
||||||
|
"file_url": "https://static.zerochan.net/Perth.%28Kantai.Collection%29.full.2920445.jpg",
|
||||||
|
"filename": "Perth.(Kantai.Collection).full.2920445",
|
||||||
|
"height" : 1366,
|
||||||
|
"id" : 2920445,
|
||||||
|
"path" : [
|
||||||
|
"Kantai Collection",
|
||||||
|
"Perth (Kantai Collection)",
|
||||||
|
],
|
||||||
|
"size" : 1975296,
|
||||||
|
"tags" : [
|
||||||
|
"Mangaka:YeFan 葉凡",
|
||||||
|
"Game:Kantai Collection",
|
||||||
|
"Character:Perth (Kantai Collection)",
|
||||||
|
"Theme:Firefighter Outfit",
|
||||||
|
"Theme:Pins",
|
||||||
],
|
],
|
||||||
"uploader": "YukinoTokisaki",
|
"uploader": "YukinoTokisaki",
|
||||||
"width" : 1920,
|
"width" : 1920,
|
||||||
|
|||||||
Reference in New Issue
Block a user