update 'match.lastindex' usage
This commit is contained in:
@@ -102,12 +102,8 @@ class BloggerPostExtractor(BloggerExtractor):
|
|||||||
pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
|
pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
|
||||||
example = "https://BLOG.blogspot.com/1970/01/TITLE.html"
|
example = "https://BLOG.blogspot.com/1970/01/TITLE.html"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
BloggerExtractor.__init__(self, match)
|
|
||||||
self.path = match[match.lastindex]
|
|
||||||
|
|
||||||
def posts(self, blog):
|
def posts(self, blog):
|
||||||
return (self.api.post_by_path(blog["id"], self.path),)
|
return (self.api.post_by_path(blog["id"], self.groups[-1]),)
|
||||||
|
|
||||||
|
|
||||||
class BloggerBlogExtractor(BloggerExtractor):
|
class BloggerBlogExtractor(BloggerExtractor):
|
||||||
@@ -126,16 +122,13 @@ class BloggerSearchExtractor(BloggerExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
|
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
|
||||||
example = "https://BLOG.blogspot.com/search?q=QUERY"
|
example = "https://BLOG.blogspot.com/search?q=QUERY"
|
||||||
|
|
||||||
def __init__(self, match):
|
def metadata(self):
|
||||||
BloggerExtractor.__init__(self, match)
|
self.query = query = text.unquote(self.groups[-1])
|
||||||
self.query = text.unquote(match[match.lastindex])
|
return {"query": query}
|
||||||
|
|
||||||
def posts(self, blog):
|
def posts(self, blog):
|
||||||
return self.api.blog_search(blog["id"], self.query)
|
return self.api.blog_search(blog["id"], self.query)
|
||||||
|
|
||||||
def metadata(self):
|
|
||||||
return {"query": self.query}
|
|
||||||
|
|
||||||
|
|
||||||
class BloggerLabelExtractor(BloggerExtractor):
|
class BloggerLabelExtractor(BloggerExtractor):
|
||||||
"""Extractor for Blogger posts by label"""
|
"""Extractor for Blogger posts by label"""
|
||||||
@@ -143,21 +136,18 @@ class BloggerLabelExtractor(BloggerExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
|
pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
|
||||||
example = "https://BLOG.blogspot.com/search/label/LABEL"
|
example = "https://BLOG.blogspot.com/search/label/LABEL"
|
||||||
|
|
||||||
def __init__(self, match):
|
def metadata(self):
|
||||||
BloggerExtractor.__init__(self, match)
|
self.label = label = text.unquote(self.groups[-1])
|
||||||
self.label = text.unquote(match[match.lastindex])
|
return {"label": label}
|
||||||
|
|
||||||
def posts(self, blog):
|
def posts(self, blog):
|
||||||
return self.api.blog_posts(blog["id"], self.label)
|
return self.api.blog_posts(blog["id"], self.label)
|
||||||
|
|
||||||
def metadata(self):
|
|
||||||
return {"label": self.label}
|
|
||||||
|
|
||||||
|
|
||||||
class BloggerAPI():
|
class BloggerAPI():
|
||||||
"""Minimal interface for the Blogger v3 API
|
"""Minimal interface for the Blogger API v3
|
||||||
|
|
||||||
Ref: https://developers.google.com/blogger
|
https://developers.google.com/blogger
|
||||||
"""
|
"""
|
||||||
API_KEY = "AIzaSyCN9ax34oMMyM07g_M-5pjeDp_312eITK8"
|
API_KEY = "AIzaSyCN9ax34oMMyM07g_M-5pjeDp_312eITK8"
|
||||||
|
|
||||||
@@ -166,27 +156,27 @@ class BloggerAPI():
|
|||||||
self.api_key = extractor.config("api-key") or self.API_KEY
|
self.api_key = extractor.config("api-key") or self.API_KEY
|
||||||
|
|
||||||
def blog_by_url(self, url):
|
def blog_by_url(self, url):
|
||||||
return self._call("blogs/byurl", {"url": url}, "blog")
|
return self._call("/blogs/byurl", {"url": url}, "blog")
|
||||||
|
|
||||||
def blog_posts(self, blog_id, label=None):
|
def blog_posts(self, blog_id, label=None):
|
||||||
endpoint = "blogs/{}/posts".format(blog_id)
|
endpoint = f"/blogs/{blog_id}/posts"
|
||||||
params = {"labels": label}
|
params = {"labels": label}
|
||||||
return self._pagination(endpoint, params)
|
return self._pagination(endpoint, params)
|
||||||
|
|
||||||
def blog_search(self, blog_id, query):
|
def blog_search(self, blog_id, query):
|
||||||
endpoint = "blogs/{}/posts/search".format(blog_id)
|
endpoint = f"/blogs/{blog_id}/posts/search"
|
||||||
params = {"q": query}
|
params = {"q": query}
|
||||||
return self._pagination(endpoint, params)
|
return self._pagination(endpoint, params)
|
||||||
|
|
||||||
def post_by_path(self, blog_id, path):
|
def post_by_path(self, blog_id, path):
|
||||||
endpoint = "blogs/{}/posts/bypath".format(blog_id)
|
endpoint = f"/blogs/{blog_id}/posts/bypath"
|
||||||
return self._call(endpoint, {"path": path}, "post")
|
return self._call(endpoint, {"path": path}, "post")
|
||||||
|
|
||||||
def _call(self, endpoint, params, notfound=None):
|
def _call(self, endpoint, params, notfound=None):
|
||||||
url = "https://www.googleapis.com/blogger/v3/" + endpoint
|
url = "https://www.googleapis.com/blogger/v3" + endpoint
|
||||||
params["key"] = self.api_key
|
params["key"] = self.api_key
|
||||||
return self.extractor.request(
|
return self.extractor.request_json(
|
||||||
url, params=params, notfound=notfound).json()
|
url, params=params, notfound=notfound)
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@@ -184,13 +184,12 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
|||||||
self.page = self.groups[-1]
|
self.page = self.groups[-1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
index_base = f"{self.root}/_/api/chan/index/?board={self.board}&page="
|
||||||
self.root, self.board)
|
thread_base = f"{self.root}/{self.board}/thread/"
|
||||||
thread_base = "{}/{}/thread/".format(self.root, self.board)
|
|
||||||
|
|
||||||
page = self.page
|
page = self.page
|
||||||
for pnum in itertools.count(text.parse_int(page, 1)):
|
for pnum in itertools.count(text.parse_int(page, 1)):
|
||||||
with self.request(index_base + format(pnum)) as response:
|
with self.request(index_base + str(pnum)) as response:
|
||||||
try:
|
try:
|
||||||
threads = response.json()
|
threads = response.json()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -270,27 +269,17 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?"
|
pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?"
|
||||||
example = "https://archived.moe/a/gallery"
|
example = "https://archived.moe/a/gallery"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
FoolfuukaExtractor.__init__(self, match)
|
|
||||||
|
|
||||||
board = match[match.lastindex]
|
|
||||||
if board.isdecimal():
|
|
||||||
self.board = match[match.lastindex-1]
|
|
||||||
self.pages = (board,)
|
|
||||||
else:
|
|
||||||
self.board = board
|
|
||||||
self.pages = map(format, itertools.count(1))
|
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"board": self.board}
|
self.board = board = self.groups[-2]
|
||||||
|
return {"board": board}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
base = "{}/_/api/chan/gallery/?board={}&page=".format(
|
pnum = self.groups[-1]
|
||||||
self.root, self.board)
|
pages = itertools.count(1) if pnum is None else (pnum,)
|
||||||
|
base = f"{self.root}/_/api/chan/gallery/?board={self.board}&page="
|
||||||
|
|
||||||
for page in self.pages:
|
for pnum in pages:
|
||||||
with self.request(base + page) as response:
|
posts = self.request_json(f"{base}{pnum}")
|
||||||
posts = response.json()
|
|
||||||
if not posts:
|
if not posts:
|
||||||
return
|
return
|
||||||
yield from posts
|
yield from posts
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class FoolslideExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.gallery_url = self.root + match[match.lastindex]
|
self.gallery_url = self.root + self.groups[-1]
|
||||||
|
|
||||||
def request(self, url):
|
def request(self, url):
|
||||||
return BaseExtractor.request(
|
return BaseExtractor.request(
|
||||||
|
|||||||
@@ -92,16 +92,12 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
|
|||||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
|
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
|
||||||
example = "https://allgirl.booru.org/index.php?page=post&s=list&tags=TAG"
|
example = "https://allgirl.booru.org/index.php?page=post&s=list&tags=TAG"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
GelbooruV01Extractor.__init__(self, match)
|
|
||||||
self.tags = match[match.lastindex]
|
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unquote(self.tags.replace("+", " "))}
|
self.tags = tags = self.groups[-1]
|
||||||
|
return {"search_tags": text.unquote(tags.replace("+", " "))}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/index.php?page=post&s=list&tags={}&pid=".format(
|
url = f"{self.root}/index.php?page=post&s=list&tags={self.tags}&pid="
|
||||||
self.root, self.tags)
|
|
||||||
return self._pagination(url, 'class="thumb"><a id="p', '"')
|
return self._pagination(url, 'class="thumb"><a id="p', '"')
|
||||||
|
|
||||||
|
|
||||||
@@ -113,16 +109,13 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
|
|||||||
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
|
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
|
||||||
example = "https://allgirl.booru.org/index.php?page=favorites&s=view&id=1"
|
example = "https://allgirl.booru.org/index.php?page=favorites&s=view&id=1"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
GelbooruV01Extractor.__init__(self, match)
|
|
||||||
self.favorite_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"favorite_id": text.parse_int(self.favorite_id)}
|
self.favorite_id = fav_id = self.groups[-1]
|
||||||
|
return {"favorite_id": text.parse_int(fav_id)}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/index.php?page=favorites&s=view&id={}&pid=".format(
|
url = (f"{self.root}/index.php"
|
||||||
self.root, self.favorite_id)
|
f"?page=favorites&s=view&id={self.favorite_id}&pid=")
|
||||||
return self._pagination(url, "posts[", "]")
|
return self._pagination(url, "posts[", "]")
|
||||||
|
|
||||||
|
|
||||||
@@ -132,9 +125,5 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
|
|||||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
|
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
|
||||||
example = "https://allgirl.booru.org/index.php?page=post&s=view&id=12345"
|
example = "https://allgirl.booru.org/index.php?page=post&s=view&id=12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
GelbooruV01Extractor.__init__(self, match)
|
|
||||||
self.post_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self._parse_post(self.post_id),)
|
return (self._parse_post(self.groups[-1]),)
|
||||||
|
|||||||
@@ -95,8 +95,8 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||||
|
|
||||||
def _html(self, post):
|
def _html(self, post):
|
||||||
return self.request("{}/index.php?page=post&s=view&id={}".format(
|
url = f"{self.root}/index.php?page=post&s=view&id={post['id']}"
|
||||||
self.root, post["id"])).text
|
return self.request(url).text
|
||||||
|
|
||||||
def _tags(self, post, page):
|
def _tags(self, post, page):
|
||||||
tag_container = (text.extr(page, '<ul id="tag-', '</ul>') or
|
tag_container = (text.extr(page, '<ul id="tag-', '</ul>') or
|
||||||
@@ -161,13 +161,9 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
|
|||||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
|
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
|
||||||
example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
|
example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
|
||||||
tags = match[match.lastindex]
|
|
||||||
self.tags = text.unquote(tags.replace("+", " "))
|
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": self.tags}
|
self.tags = tags = text.unquote(self.groups[-1].replace("+", " "))
|
||||||
|
return {"search_tags": tags}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
if self.tags == "all":
|
if self.tags == "all":
|
||||||
@@ -184,7 +180,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
GelbooruV02Extractor.__init__(self, match)
|
||||||
self.pool_id = match[match.lastindex]
|
self.pool_id = self.groups[-1]
|
||||||
|
|
||||||
if self.category == "rule34":
|
if self.category == "rule34":
|
||||||
self.posts = self._posts_pages
|
self.posts = self._posts_pages
|
||||||
@@ -197,8 +193,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
|
|||||||
return num
|
return num
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
url = "{}/index.php?page=pool&s=show&id={}".format(
|
url = f"{self.root}/index.php?page=pool&s=show&id={self.pool_id}"
|
||||||
self.root, self.pool_id)
|
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
|
|
||||||
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
|
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
|
||||||
@@ -234,12 +229,9 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
|
|||||||
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
|
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
|
||||||
example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
|
example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
|
||||||
self.favorite_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"favorite_id": text.parse_int(self.favorite_id)}
|
self.favorite_id = fav_id = self.groups[-1]
|
||||||
|
return {"favorite_id": text.parse_int(fav_id)}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return self._pagination_html({
|
return self._pagination_html({
|
||||||
@@ -255,9 +247,5 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
|
|||||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
|
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
|
||||||
example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
|
example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
GelbooruV02Extractor.__init__(self, match)
|
|
||||||
self.post_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return self._pagination({"id": self.post_id})
|
return self._pagination({"id": self.groups[-1]})
|
||||||
|
|||||||
@@ -33,16 +33,9 @@ class JschanThreadExtractor(JschanExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html"
|
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html"
|
||||||
example = "https://94chan.org/a/thread/12345.html"
|
example = "https://94chan.org/a/thread/12345.html"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
JschanExtractor.__init__(self, match)
|
|
||||||
index = match.lastindex
|
|
||||||
self.board = match[index-1]
|
|
||||||
self.thread = match[index]
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/thread/{}.json".format(
|
url = f"{self.root}/{self.groups[-2]}/thread/{self.groups[-1]}.json"
|
||||||
self.root, self.board, self.thread)
|
thread = self.request_json(url)
|
||||||
thread = self.request(url).json()
|
|
||||||
thread["threadId"] = thread["postId"]
|
thread["threadId"] = thread["postId"]
|
||||||
posts = thread.pop("replies", ())
|
posts = thread.pop("replies", ())
|
||||||
|
|
||||||
@@ -53,7 +46,7 @@ class JschanThreadExtractor(JschanExtractor):
|
|||||||
thread.update(post)
|
thread.update(post)
|
||||||
thread["count"] = len(files)
|
thread["count"] = len(files)
|
||||||
for num, file in enumerate(files):
|
for num, file in enumerate(files):
|
||||||
url = self.root + "/file/" + file["filename"]
|
url = f"{self.root}/file/{file['filename']}"
|
||||||
file.update(thread)
|
file.update(thread)
|
||||||
file["num"] = num
|
file["num"] = num
|
||||||
file["siteFilename"] = file["filename"]
|
file["siteFilename"] = file["filename"]
|
||||||
@@ -68,14 +61,10 @@ class JschanBoardExtractor(JschanExtractor):
|
|||||||
r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)")
|
r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)")
|
||||||
example = "https://94chan.org/a/"
|
example = "https://94chan.org/a/"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
JschanExtractor.__init__(self, match)
|
|
||||||
self.board = match[match.lastindex]
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/catalog.json".format(self.root, self.board)
|
board = self.groups[-1]
|
||||||
for thread in self.request(url).json():
|
url = f"{self.root}/{board}/catalog.json"
|
||||||
url = "{}/{}/thread/{}.html".format(
|
for thread in self.request_json(url):
|
||||||
self.root, self.board, thread["postId"])
|
url = f"{self.root}/{board}/thread/{thread['postId']}.html"
|
||||||
thread["_extractor"] = JschanThreadExtractor
|
thread["_extractor"] = JschanThreadExtractor
|
||||||
yield Message.Queue, url, thread
|
yield Message.Queue, url, thread
|
||||||
|
|||||||
@@ -42,22 +42,15 @@ class LynxchanThreadExtractor(LynxchanExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
|
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
|
||||||
example = "https://endchan.org/a/res/12345.html"
|
example = "https://endchan.org/a/res/12345.html"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
LynxchanExtractor.__init__(self, match)
|
|
||||||
index = match.lastindex
|
|
||||||
self.board = match[index-1]
|
|
||||||
self.thread = match[index]
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
|
url = f"{self.root}/{self.groups[-2]}/res/{self.groups[-1]}.json"
|
||||||
thread = self.request(url).json()
|
thread = self.request_json(url)
|
||||||
thread["postId"] = thread["threadId"]
|
thread["postId"] = thread["threadId"]
|
||||||
posts = thread.pop("posts", ())
|
posts = thread.pop("posts", ())
|
||||||
|
|
||||||
yield Message.Directory, thread
|
yield Message.Directory, thread
|
||||||
for post in itertools.chain((thread,), posts):
|
for post in itertools.chain((thread,), posts):
|
||||||
files = post.pop("files", ())
|
if files := post.pop("files", ()):
|
||||||
if files:
|
|
||||||
thread.update(post)
|
thread.update(post)
|
||||||
for num, file in enumerate(files):
|
for num, file in enumerate(files):
|
||||||
file.update(thread)
|
file.update(thread)
|
||||||
@@ -73,14 +66,10 @@ class LynxchanBoardExtractor(LynxchanExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
|
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
|
||||||
example = "https://endchan.org/a/"
|
example = "https://endchan.org/a/"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
LynxchanExtractor.__init__(self, match)
|
|
||||||
self.board = match[match.lastindex]
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/catalog.json".format(self.root, self.board)
|
board = self.groups[-1]
|
||||||
for thread in self.request(url).json():
|
url = f"{self.root}/{board}/catalog.json"
|
||||||
url = "{}/{}/res/{}.html".format(
|
for thread in self.request_json(url):
|
||||||
self.root, self.board, thread["threadId"])
|
url = f"{self.root}/{board}/res/{thread['threadId']}.html"
|
||||||
thread["_extractor"] = LynxchanThreadExtractor
|
thread["_extractor"] = LynxchanThreadExtractor
|
||||||
yield Message.Queue, url, thread
|
yield Message.Queue, url, thread
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class MastodonExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.item = match[match.lastindex]
|
self.item = self.groups[-1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.instance = self.root.partition("://")[2]
|
self.instance = self.root.partition("://")[2]
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class MisskeyExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.item = match[match.lastindex]
|
self.item = self.groups[-1]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.api = MisskeyAPI(self)
|
self.api = MisskeyAPI(self)
|
||||||
@@ -110,7 +110,7 @@ class MisskeyUserExtractor(Dispatch, MisskeyExtractor):
|
|||||||
example = "https://misskey.io/@USER"
|
example = "https://misskey.io/@USER"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
base = "{}/@{}/".format(self.root, self.item)
|
base = f"{self.root}/@{self.item}/"
|
||||||
return self._dispatch_extractors((
|
return self._dispatch_extractors((
|
||||||
(MisskeyInfoExtractor , base + "info"),
|
(MisskeyInfoExtractor , base + "info"),
|
||||||
(MisskeyAvatarExtractor , base + "avatar"),
|
(MisskeyAvatarExtractor , base + "avatar"),
|
||||||
@@ -174,10 +174,9 @@ class MisskeyFollowingExtractor(MisskeyExtractor):
|
|||||||
user_id = self.api.user_id_by_username(self.item)
|
user_id = self.api.user_id_by_username(self.item)
|
||||||
for user in self.api.users_following(user_id):
|
for user in self.api.users_following(user_id):
|
||||||
user = user["followee"]
|
user = user["followee"]
|
||||||
url = self.root + "/@" + user["username"]
|
url = f"{self.root}/@{user['username']}"
|
||||||
host = user["host"]
|
if (host := user["host"]) is not None:
|
||||||
if host is not None:
|
url = f"{url}@{host}"
|
||||||
url += "@" + host
|
|
||||||
user["_extractor"] = MisskeyUserExtractor
|
user["_extractor"] = MisskeyUserExtractor
|
||||||
yield Message.Queue, url, user
|
yield Message.Queue, url, user
|
||||||
|
|
||||||
@@ -248,7 +247,7 @@ class MisskeyAPI():
|
|||||||
return self._pagination(endpoint, data)
|
return self._pagination(endpoint, data)
|
||||||
|
|
||||||
def _call(self, endpoint, data):
|
def _call(self, endpoint, data):
|
||||||
url = self.root + "/api" + endpoint
|
url = f"{self.root}/api{endpoint}"
|
||||||
return self.extractor.request_json(url, method="POST", json=data)
|
return self.extractor.request_json(url, method="POST", json=data)
|
||||||
|
|
||||||
def _pagination(self, endpoint, data):
|
def _pagination(self, endpoint, data):
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ class MoebooruExtractor(BooruExtractor):
|
|||||||
post["date"] = text.parse_timestamp(post["created_at"])
|
post["date"] = text.parse_timestamp(post["created_at"])
|
||||||
|
|
||||||
def _html(self, post):
|
def _html(self, post):
|
||||||
return self.request("{}/post/show/{}".format(
|
url = f"{self.root}/post/show/{post['id']}"
|
||||||
self.root, post["id"])).text
|
return self.request(url).text
|
||||||
|
|
||||||
def _tags(self, post, page):
|
def _tags(self, post, page):
|
||||||
tag_container = text.extr(page, '<ul id="tag-', '</ul>')
|
tag_container = text.extr(page, '<ul id="tag-', '</ul>')
|
||||||
@@ -61,7 +61,7 @@ class MoebooruExtractor(BooruExtractor):
|
|||||||
params["limit"] = self.per_page
|
params["limit"] = self.per_page
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
posts = self.request(url, params=params).json()
|
posts = self.request_json(url, params=params)
|
||||||
yield from posts
|
yield from posts
|
||||||
|
|
||||||
if len(posts) < self.per_page:
|
if len(posts) < self.per_page:
|
||||||
@@ -98,15 +98,14 @@ class MoebooruTagExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MoebooruExtractor.__init__(self, match)
|
MoebooruExtractor.__init__(self, match)
|
||||||
tags = match[match.lastindex]
|
self.tags = text.unquote(self.groups[-1].replace("+", " "))
|
||||||
self.tags = text.unquote(tags.replace("+", " "))
|
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": self.tags}
|
return {"search_tags": self.tags}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
params = {"tags": self.tags}
|
params = {"tags": self.tags}
|
||||||
return self._pagination(self.root + "/post.json", params)
|
return self._pagination(f"{self.root}/post.json", params)
|
||||||
|
|
||||||
|
|
||||||
class MoebooruPoolExtractor(MoebooruExtractor):
|
class MoebooruPoolExtractor(MoebooruExtractor):
|
||||||
@@ -118,12 +117,12 @@ class MoebooruPoolExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MoebooruExtractor.__init__(self, match)
|
MoebooruExtractor.__init__(self, match)
|
||||||
self.pool_id = match[match.lastindex]
|
self.pool_id = self.groups[-1]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
if self.config("metadata"):
|
if self.config("metadata"):
|
||||||
url = "{}/pool/show/{}.json".format(self.root, self.pool_id)
|
url = f"{self.root}/pool/show/{self.pool_id}.json"
|
||||||
pool = self.request(url).json()
|
pool = self.request_json(url)
|
||||||
pool["name"] = pool["name"].replace("_", " ")
|
pool["name"] = pool["name"].replace("_", " ")
|
||||||
pool.pop("posts", None)
|
pool.pop("posts", None)
|
||||||
return {"pool": pool}
|
return {"pool": pool}
|
||||||
@@ -131,7 +130,7 @@ class MoebooruPoolExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
params = {"tags": "pool:" + self.pool_id}
|
params = {"tags": "pool:" + self.pool_id}
|
||||||
return self._pagination(self.root + "/post.json", params)
|
return self._pagination(f"{self.root}/post.json", params)
|
||||||
|
|
||||||
|
|
||||||
class MoebooruPostExtractor(MoebooruExtractor):
|
class MoebooruPostExtractor(MoebooruExtractor):
|
||||||
@@ -140,13 +139,9 @@ class MoebooruPostExtractor(MoebooruExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/post/show/(\d+)"
|
pattern = BASE_PATTERN + r"/post/show/(\d+)"
|
||||||
example = "https://yande.re/post/show/12345"
|
example = "https://yande.re/post/show/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
MoebooruExtractor.__init__(self, match)
|
|
||||||
self.post_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
params = {"tags": "id:" + self.post_id}
|
params = {"tags": "id:" + self.groups[-1]}
|
||||||
return self.request(self.root + "/post.json", params=params).json()
|
return self.request_json(f"{self.root}/post.json", params=params)
|
||||||
|
|
||||||
|
|
||||||
class MoebooruPopularExtractor(MoebooruExtractor):
|
class MoebooruPopularExtractor(MoebooruExtractor):
|
||||||
@@ -159,8 +154,8 @@ class MoebooruPopularExtractor(MoebooruExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MoebooruExtractor.__init__(self, match)
|
MoebooruExtractor.__init__(self, match)
|
||||||
self.scale = match[match.lastindex-1]
|
self.scale = self.groups[-2]
|
||||||
self.query = match[match.lastindex]
|
self.query = self.groups[-1]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
self.params = params = text.parse_query(self.query)
|
self.params = params = text.parse_query(self.query)
|
||||||
@@ -186,5 +181,5 @@ class MoebooruPopularExtractor(MoebooruExtractor):
|
|||||||
return {"date": date, "scale": scale}
|
return {"date": date, "scale": scale}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "{}/post/popular_{}.json".format(self.root, self.scale)
|
url = f"{self.root}/post/popular_{self.scale}.json"
|
||||||
return self.request(url, params=self.params).json()
|
return self.request_json(url, params=self.params)
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
self.user_id = text.parse_int(match[match.lastindex])
|
self.user_id = text.parse_int(self.groups[-1])
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
self.cookies_domain = "." + self.root.rpartition("/")[2]
|
self.cookies_domain = "." + self.root.rpartition("/")[2]
|
||||||
@@ -294,9 +294,5 @@ class NijieImageExtractor(NijieExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
|
pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
|
||||||
example = "https://nijie.info/view.php?id=12345"
|
example = "https://nijie.info/view.php?id=12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
NijieExtractor.__init__(self, match)
|
|
||||||
self.image_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def image_ids(self):
|
def image_ids(self):
|
||||||
return (self.image_id,)
|
return (self.groups[-1],)
|
||||||
|
|||||||
@@ -24,9 +24,8 @@ class NitterExtractor(BaseExtractor):
|
|||||||
self.cookies_domain = self.root.partition("://")[2]
|
self.cookies_domain = self.root.partition("://")[2]
|
||||||
BaseExtractor.__init__(self, match)
|
BaseExtractor.__init__(self, match)
|
||||||
|
|
||||||
lastindex = match.lastindex
|
self.user = self.groups[-2]
|
||||||
self.user = match[lastindex]
|
self.user_id = self.groups[-1]
|
||||||
self.user_id = match[lastindex + 1]
|
|
||||||
self.user_obj = None
|
self.user_obj = None
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -65,12 +65,8 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/(?:images/)?(\d+)"
|
pattern = BASE_PATTERN + r"/(?:images/)?(\d+)"
|
||||||
example = "https://derpibooru.org/images/12345"
|
example = "https://derpibooru.org/images/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
PhilomenaExtractor.__init__(self, match)
|
|
||||||
self.image_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self.api.image(self.image_id),)
|
return (self.api.image(self.groups[-1]),)
|
||||||
|
|
||||||
|
|
||||||
class PhilomenaSearchExtractor(PhilomenaExtractor):
|
class PhilomenaSearchExtractor(PhilomenaExtractor):
|
||||||
@@ -82,9 +78,9 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
PhilomenaExtractor.__init__(self, match)
|
PhilomenaExtractor.__init__(self, match)
|
||||||
groups = match.groups()
|
|
||||||
if groups[-1]:
|
if q := self.groups[-1]:
|
||||||
q = groups[-1].replace("+", " ")
|
q = q.replace("+", " ")
|
||||||
for old, new in (
|
for old, new in (
|
||||||
("-colon-" , ":"),
|
("-colon-" , ":"),
|
||||||
("-dash-" , "-"),
|
("-dash-" , "-"),
|
||||||
@@ -97,7 +93,7 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
|
|||||||
q = q.replace(old, new)
|
q = q.replace(old, new)
|
||||||
self.params = {"q": text.unquote(text.unquote(q))}
|
self.params = {"q": text.unquote(text.unquote(q))}
|
||||||
else:
|
else:
|
||||||
self.params = text.parse_query(groups[-2])
|
self.params = text.parse_query(self.groups[-2])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": self.params.get("q", "")}
|
return {"search_tags": self.params.get("q", "")}
|
||||||
@@ -114,18 +110,14 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/galleries/(\d+)"
|
pattern = BASE_PATTERN + r"/galleries/(\d+)"
|
||||||
example = "https://derpibooru.org/galleries/12345"
|
example = "https://derpibooru.org/galleries/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
PhilomenaExtractor.__init__(self, match)
|
|
||||||
self.gallery_id = match[match.lastindex]
|
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
try:
|
try:
|
||||||
return {"gallery": self.api.gallery(self.gallery_id)}
|
return {"gallery": self.api.gallery(self.groups[-1])}
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise exception.NotFoundError("gallery")
|
raise exception.NotFoundError("gallery")
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
gallery_id = "gallery_id:" + self.gallery_id
|
gallery_id = f"gallery_id:{self.groups[-1]}"
|
||||||
params = {"sd": "desc", "sf": gallery_id, "q": gallery_id}
|
params = {"sd": "desc", "sf": gallery_id, "q": gallery_id}
|
||||||
return self.api.search(params)
|
return self.api.search(params)
|
||||||
|
|
||||||
|
|||||||
@@ -176,7 +176,7 @@ class ReactorTagExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.tag = match[match.lastindex]
|
self.tag = self.groups[-1]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
||||||
@@ -192,7 +192,7 @@ class ReactorSearchExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.tag = match[match.lastindex]
|
self.tag = self.groups[-1]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
|
||||||
@@ -207,7 +207,7 @@ class ReactorUserExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.user = match[match.lastindex]
|
self.user = self.groups[-1]
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"user": text.unescape(self.user).replace("+", " ")}
|
return {"user": text.unescape(self.user).replace("+", " ")}
|
||||||
@@ -221,7 +221,7 @@ class ReactorPostExtractor(ReactorExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ReactorExtractor.__init__(self, match)
|
ReactorExtractor.__init__(self, match)
|
||||||
self.post_id = match[match.lastindex]
|
self.post_id = self.groups[-1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
post = self.request(self.root + self.path).text
|
post = self.request(self.root + self.path).text
|
||||||
|
|||||||
@@ -18,10 +18,6 @@ class ShopifyExtractor(BaseExtractor):
|
|||||||
filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
|
filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
|
||||||
archive_fmt = "{id}"
|
archive_fmt = "{id}"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
BaseExtractor.__init__(self, match)
|
|
||||||
self.item_url = self.root + match[match.lastindex]
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = self.metadata()
|
data = self.metadata()
|
||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
@@ -98,14 +94,15 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
|
|||||||
example = "https://www.fashionnova.com/collections/TITLE"
|
example = "https://www.fashionnova.com/collections/TITLE"
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return self.request(self.item_url + ".json").json()
|
url = f"{self.root}{self.groups[-1]}.json"
|
||||||
|
return self.request_json(url)
|
||||||
|
|
||||||
def products(self):
|
def products(self):
|
||||||
url = self.item_url + "/products.json"
|
url = f"{self.root}{self.groups[-1]}/products.json"
|
||||||
params = {"page": 1}
|
params = {"page": 1}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params).json()["products"]
|
data = self.request_json(url, params=params)["products"]
|
||||||
if not data:
|
if not data:
|
||||||
return
|
return
|
||||||
yield from data
|
yield from data
|
||||||
@@ -120,6 +117,7 @@ class ShopifyProductExtractor(ShopifyExtractor):
|
|||||||
example = "https://www.fashionnova.com/collections/TITLE/products/NAME"
|
example = "https://www.fashionnova.com/collections/TITLE/products/NAME"
|
||||||
|
|
||||||
def products(self):
|
def products(self):
|
||||||
product = self.request(self.item_url + ".json").json()["product"]
|
url = f"{self.root}{self.groups[-1]}.json"
|
||||||
|
product = self.request_json(url)["product"]
|
||||||
del product["image"]
|
del product["image"]
|
||||||
return (product,)
|
return (product,)
|
||||||
|
|||||||
@@ -38,21 +38,18 @@ class VichanThreadExtractor(VichanExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
|
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
|
||||||
example = "https://8kun.top/a/res/12345.html"
|
example = "https://8kun.top/a/res/12345.html"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
VichanExtractor.__init__(self, match)
|
|
||||||
index = match.lastindex
|
|
||||||
self.board = match[index-1]
|
|
||||||
self.thread = match[index]
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
|
board = self.groups[-2]
|
||||||
posts = self.request(url).json()["posts"]
|
thread = self.groups[-1]
|
||||||
|
url = f"{self.root}/{board}/res/{thread}.json"
|
||||||
|
posts = self.request_json(url)["posts"]
|
||||||
|
|
||||||
title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
|
title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
|
||||||
process = (self._process_8kun if self.category == "8kun" else
|
process = (self._process_8kun if self.category == "8kun" else
|
||||||
self._process)
|
self._process)
|
||||||
data = {
|
data = {
|
||||||
"board" : self.board,
|
"board" : board,
|
||||||
"thread": self.thread,
|
"thread": thread,
|
||||||
"title" : text.unescape(title)[:50],
|
"title" : text.unescape(title)[:50],
|
||||||
"num" : 0,
|
"num" : 0,
|
||||||
}
|
}
|
||||||
@@ -68,24 +65,25 @@ class VichanThreadExtractor(VichanExtractor):
|
|||||||
|
|
||||||
def _process(self, post, data):
|
def _process(self, post, data):
|
||||||
post.update(data)
|
post.update(data)
|
||||||
post["extension"] = post["ext"][1:]
|
ext = post["ext"]
|
||||||
post["url"] = "{}/{}/src/{}{}".format(
|
post["extension"] = ext[1:]
|
||||||
self.root, post["board"], post["tim"], post["ext"])
|
post["url"] = url = \
|
||||||
return Message.Url, post["url"], post
|
f"{self.root}/{post['board']}/src/{post['tim']}{ext}"
|
||||||
|
return Message.Url, url, post
|
||||||
|
|
||||||
def _process_8kun(self, post, data):
|
def _process_8kun(self, post, data):
|
||||||
post.update(data)
|
post.update(data)
|
||||||
post["extension"] = post["ext"][1:]
|
ext = post["ext"]
|
||||||
|
|
||||||
tim = post["tim"]
|
tim = post["tim"]
|
||||||
if len(tim) > 16:
|
|
||||||
post["url"] = "https://media.128ducks.com/file_store/{}{}".format(
|
|
||||||
tim, post["ext"])
|
|
||||||
else:
|
|
||||||
post["url"] = "https://media.128ducks.com/{}/src/{}{}".format(
|
|
||||||
post["board"], tim, post["ext"])
|
|
||||||
|
|
||||||
return Message.Url, post["url"], post
|
if len(tim) > 16:
|
||||||
|
url = f"https://media.128ducks.com/file_store/{tim}{ext}"
|
||||||
|
else:
|
||||||
|
url = f"https://media.128ducks.com/{post['board']}/src/{tim}{ext}"
|
||||||
|
|
||||||
|
post["url"] = url
|
||||||
|
post["extension"] = ext[1:]
|
||||||
|
return Message.Url, url, post
|
||||||
|
|
||||||
|
|
||||||
class VichanBoardExtractor(VichanExtractor):
|
class VichanBoardExtractor(VichanExtractor):
|
||||||
@@ -94,18 +92,14 @@ class VichanBoardExtractor(VichanExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
|
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
|
||||||
example = "https://8kun.top/a/"
|
example = "https://8kun.top/a/"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
VichanExtractor.__init__(self, match)
|
|
||||||
self.board = match[match.lastindex]
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/{}/threads.json".format(self.root, self.board)
|
board = self.groups[-1]
|
||||||
threads = self.request(url).json()
|
url = f"{self.root}/{board}/threads.json"
|
||||||
|
threads = self.request_json(url)
|
||||||
|
|
||||||
for page in threads:
|
for page in threads:
|
||||||
for thread in page["threads"]:
|
for thread in page["threads"]:
|
||||||
url = "{}/{}/res/{}.html".format(
|
url = f"{self.root}/{board}/res/{thread['no']}.html"
|
||||||
self.root, self.board, thread["no"])
|
|
||||||
thread["page"] = page["page"]
|
thread["page"] = page["page"]
|
||||||
thread["_extractor"] = VichanThreadExtractor
|
thread["_extractor"] = VichanThreadExtractor
|
||||||
yield Message.Queue, url, thread
|
yield Message.Queue, url, thread
|
||||||
|
|||||||
Reference in New Issue
Block a user