Merge branch '1.17.0'
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 Mike Fährmann
|
||||
# Copyright 2015-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
"""Extractors for *booru sites"""
|
||||
|
||||
from .common import Extractor, Message, generate_extractors
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text, util, exception
|
||||
|
||||
from xml.etree import ElementTree
|
||||
@@ -17,7 +17,7 @@ import operator
|
||||
import re
|
||||
|
||||
|
||||
class BooruExtractor(Extractor):
|
||||
class BooruExtractor(BaseExtractor):
|
||||
"""Base class for *booru extractors"""
|
||||
basecategory = "booru"
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
@@ -104,14 +104,55 @@ class BooruExtractor(Extractor):
|
||||
params["pid"] += 1
|
||||
|
||||
|
||||
BASE_PATTERN = BooruExtractor.update({
|
||||
"rule34": {
|
||||
"root": "https://rule34.xxx",
|
||||
},
|
||||
"safebooru": {
|
||||
"root": "https://safebooru.org",
|
||||
},
|
||||
"realbooru": {
|
||||
"root": "https://realbooru.com",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class BooruPostExtractor(BooruExtractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
|
||||
test = (
|
||||
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "danraku",
|
||||
"tags_character": "kashima_(kantai_collection)",
|
||||
"tags_copyright": "kantai_collection",
|
||||
"tags_general": str,
|
||||
"tags_metadata": str,
|
||||
},
|
||||
}),
|
||||
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
|
||||
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
|
||||
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "kawanakajima",
|
||||
"tags_character": "heath_ledger ronald_mcdonald the_joker",
|
||||
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
|
||||
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
|
||||
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self, match)
|
||||
self.post_id = match.group(1)
|
||||
self.post_id = match.group(match.lastindex)
|
||||
|
||||
def posts(self):
|
||||
return self._pagination({"id": self.post_id})
|
||||
@@ -121,11 +162,26 @@ class BooruTagExtractor(BooruExtractor):
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
|
||||
test = (
|
||||
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
|
||||
"count": 1,
|
||||
}),
|
||||
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
|
||||
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
|
||||
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
|
||||
}),
|
||||
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
|
||||
"count": ">= 64",
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self, match)
|
||||
self.tags = text.unquote(match.group(1).replace("+", " "))
|
||||
tags = match.group(match.lastindex)
|
||||
self.tags = text.unquote(tags.replace("+", " "))
|
||||
|
||||
def metadata(self):
|
||||
return {"search_tags": self.tags}
|
||||
@@ -138,11 +194,22 @@ class BooruPoolExtractor(BooruExtractor):
|
||||
subcategory = "pool"
|
||||
directory_fmt = ("{category}", "pool", "{pool}")
|
||||
archive_fmt = "p_{pool}_{id}"
|
||||
pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
|
||||
test = (
|
||||
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
|
||||
"count": 3,
|
||||
}),
|
||||
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
|
||||
"count": 5,
|
||||
}),
|
||||
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
|
||||
"count": 3,
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self, match)
|
||||
self.pool_id = match.group(1)
|
||||
self.pool_id = match.group(match.lastindex)
|
||||
self.post_ids = ()
|
||||
|
||||
def skip(self, num):
|
||||
@@ -170,87 +237,3 @@ class BooruPoolExtractor(BooruExtractor):
|
||||
for params["id"] in util.advance(self.post_ids, self.page_start):
|
||||
for post in self._api_request(params):
|
||||
yield post.attrib
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"rule34": {
|
||||
"root": "https://rule34.xxx",
|
||||
"test-tag": (
|
||||
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
|
||||
"count": 1,
|
||||
}),
|
||||
),
|
||||
"test-pool": (
|
||||
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
|
||||
"count": 3,
|
||||
}),
|
||||
),
|
||||
"test-post": (
|
||||
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "danraku",
|
||||
"tags_character": "kashima_(kantai_collection)",
|
||||
"tags_copyright": "kantai_collection",
|
||||
"tags_general": str,
|
||||
"tags_metadata": str,
|
||||
},
|
||||
}),
|
||||
),
|
||||
},
|
||||
"safebooru": {
|
||||
"root": "https://safebooru.org",
|
||||
"test-tag": (
|
||||
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
|
||||
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
|
||||
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
|
||||
}),
|
||||
),
|
||||
"test-pool": (
|
||||
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
|
||||
"count": 5,
|
||||
}),
|
||||
),
|
||||
"test-post": (
|
||||
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
|
||||
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
|
||||
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "kawanakajima",
|
||||
"tags_character": "heath_ledger ronald_mcdonald the_joker",
|
||||
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
),
|
||||
},
|
||||
"realbooru": {
|
||||
"root": "https://realbooru.com",
|
||||
"test-tag": (
|
||||
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
|
||||
"count": ">= 64",
|
||||
}),
|
||||
),
|
||||
"test-pool": (
|
||||
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
|
||||
"count": 3,
|
||||
}),
|
||||
),
|
||||
"test-post": (
|
||||
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
|
||||
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
|
||||
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
|
||||
}),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
generate_extractors(EXTRACTORS, globals(), (
|
||||
BooruTagExtractor,
|
||||
BooruPoolExtractor,
|
||||
BooruPostExtractor,
|
||||
))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2020 Mike Fährmann
|
||||
# Copyright 2014-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -527,46 +527,37 @@ class AsynchronousMixin():
|
||||
messages.put(None)
|
||||
|
||||
|
||||
def generate_extractors(extractor_data, symtable, classes):
|
||||
"""Dynamically generate Extractor classes"""
|
||||
extractors = config.get(("extractor",), classes[0].basecategory)
|
||||
ckey = extractor_data.get("_ckey")
|
||||
prev = None
|
||||
class BaseExtractor(Extractor):
|
||||
instances = None
|
||||
|
||||
if extractors:
|
||||
extractor_data.update(extractors)
|
||||
def __init__(self, match):
|
||||
if not self.category:
|
||||
for index, group in enumerate(match.groups()):
|
||||
if group is not None:
|
||||
self.category, self.root = self.instances[index]
|
||||
break
|
||||
Extractor.__init__(self, match)
|
||||
|
||||
for category, info in extractor_data.items():
|
||||
@classmethod
|
||||
def update(cls, instances):
|
||||
extra_instances = config.get(("extractor",), cls.basecategory)
|
||||
if extra_instances:
|
||||
for category, info in extra_instances.items():
|
||||
if isinstance(info, dict) and "root" in info:
|
||||
instances[category] = info
|
||||
|
||||
if not isinstance(info, dict) or "root" not in info:
|
||||
continue
|
||||
pattern_list = []
|
||||
instance_list = cls.instances = []
|
||||
for category, info in instances.items():
|
||||
root = info["root"]
|
||||
instance_list.append((category, root))
|
||||
|
||||
root = info["root"]
|
||||
domain = root[root.index(":") + 3:]
|
||||
pattern = info.get("pattern") or re.escape(domain)
|
||||
name = (info.get("name") or category).capitalize()
|
||||
pattern = info.get("pattern")
|
||||
if not pattern:
|
||||
pattern = re.escape(root[root.index(":") + 3:])
|
||||
pattern_list.append(pattern + "()")
|
||||
|
||||
for cls in classes:
|
||||
|
||||
class Extr(cls):
|
||||
pass
|
||||
Extr.__module__ = cls.__module__
|
||||
Extr.__name__ = Extr.__qualname__ = \
|
||||
name + cls.subcategory.capitalize() + "Extractor"
|
||||
Extr.__doc__ = \
|
||||
"Extractor for " + cls.subcategory + "s from " + domain
|
||||
Extr.category = category
|
||||
Extr.pattern = r"(?:https?://)?" + pattern + cls.pattern_fmt
|
||||
Extr.test = info.get("test-" + cls.subcategory)
|
||||
Extr.root = root
|
||||
|
||||
if "extra" in info:
|
||||
for key, value in info["extra"].items():
|
||||
setattr(Extr, key, value)
|
||||
if prev and ckey:
|
||||
setattr(Extr, ckey, prev)
|
||||
|
||||
symtable[Extr.__name__] = prev = Extr
|
||||
return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")"
|
||||
|
||||
|
||||
# Undo automatic pyOpenSSL injection by requests
|
||||
|
||||
@@ -8,21 +8,21 @@
|
||||
|
||||
"""Extractors for 4chan archives based on FoolFuuka"""
|
||||
|
||||
from .common import Extractor, Message, generate_extractors
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text
|
||||
import itertools
|
||||
|
||||
|
||||
class FoolfuukaExtractor(Extractor):
|
||||
class FoolfuukaExtractor(BaseExtractor):
|
||||
"""Base extractor for FoolFuuka based boards/archives"""
|
||||
basecategory = "foolfuuka"
|
||||
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
|
||||
external = "default"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.session.headers["Referer"] = self.root
|
||||
if self.external == "direct":
|
||||
if self.category == "b4k":
|
||||
self.remote = self._remote_direct
|
||||
|
||||
def items(self):
|
||||
@@ -43,7 +43,7 @@ class FoolfuukaExtractor(Extractor):
|
||||
yield Message.Url, url, post
|
||||
|
||||
def metadata(self):
|
||||
""" """
|
||||
"""Return general metadata"""
|
||||
|
||||
def posts(self):
|
||||
"""Return an iterable with all relevant posts"""
|
||||
@@ -59,16 +59,90 @@ class FoolfuukaExtractor(Extractor):
|
||||
return media["remote_media_link"]
|
||||
|
||||
|
||||
BASE_PATTERN = FoolfuukaExtractor.update({
|
||||
"4plebs": {
|
||||
"root": "https://archive.4plebs.org",
|
||||
"pattern": r"(?:archive\.)?4plebs\.org",
|
||||
},
|
||||
"archivedmoe": {
|
||||
"root": "https://archived.moe",
|
||||
},
|
||||
"archiveofsins": {
|
||||
"root": "https://archiveofsins.com",
|
||||
"pattern": r"(?:www\.)?archiveofsins\.com",
|
||||
},
|
||||
"b4k": {
|
||||
"root": "https://arch.b4k.co",
|
||||
},
|
||||
"desuarchive": {
|
||||
"root": "https://desuarchive.org",
|
||||
},
|
||||
"fireden": {
|
||||
"root": "https://boards.fireden.net",
|
||||
},
|
||||
"nyafuu": {
|
||||
"root": "https://archive.nyafuu.org",
|
||||
"pattern": r"(?:archive\.)?nyafuu\.org",
|
||||
},
|
||||
"rbt": {
|
||||
"root": "https://rbt.asia",
|
||||
"pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
|
||||
},
|
||||
"thebarchive": {
|
||||
"root": "https://thebarchive.com",
|
||||
"pattern": r"thebarchive\.com",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for threads on FoolFuuka based boards/archives"""
|
||||
subcategory = "thread"
|
||||
directory_fmt = ("{category}", "{board[shortname]}",
|
||||
"{thread_num}{title:? - //}")
|
||||
pattern_fmt = r"/([^/?#]+)/thread/(\d+)"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
|
||||
test = (
|
||||
("https://archive.4plebs.org/tg/thread/54059290", {
|
||||
"url": "07452944164b602502b02b24521f8cee5c484d2a",
|
||||
}),
|
||||
("https://archived.moe/gd/thread/309639/", {
|
||||
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
|
||||
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
|
||||
}),
|
||||
("https://archived.moe/a/thread/159767162/", {
|
||||
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
|
||||
}),
|
||||
("https://archiveofsins.com/h/thread/4668813/", {
|
||||
"url": "f612d287087e10a228ef69517cf811539db9a102",
|
||||
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
|
||||
}),
|
||||
("https://arch.b4k.co/meta/thread/196/", {
|
||||
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
|
||||
}),
|
||||
("https://desuarchive.org/a/thread/159542679/", {
|
||||
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
|
||||
}),
|
||||
("https://boards.fireden.net/sci/thread/11264294/", {
|
||||
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
|
||||
}),
|
||||
("https://archive.nyafuu.org/c/thread/2849220/", {
|
||||
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
|
||||
}),
|
||||
("https://rbt.asia/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
("https://thebarchive.com/b/thread/739772332/", {
|
||||
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
self.board, self.thread = match.groups()
|
||||
self.board = match.group(match.lastindex-1)
|
||||
self.thread = match.group(match.lastindex)
|
||||
self.data = None
|
||||
|
||||
def metadata(self):
|
||||
@@ -78,23 +152,34 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
||||
return self.data["op"]
|
||||
|
||||
def posts(self):
|
||||
op = (self.data["op"],)
|
||||
posts = self.data.get("posts")
|
||||
if posts:
|
||||
posts = list(posts.values())
|
||||
posts.sort(key=lambda p: p["timestamp"])
|
||||
else:
|
||||
posts = ()
|
||||
return itertools.chain((self.data["op"],), posts)
|
||||
return itertools.chain(op, posts)
|
||||
return op
|
||||
|
||||
|
||||
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for FoolFuuka based boards/archives"""
|
||||
subcategory = "board"
|
||||
pattern_fmt = r"/([^/?#]+)/\d*$"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
|
||||
test = (
|
||||
("https://archive.4plebs.org/tg/"),
|
||||
("https://archived.moe/gd/"),
|
||||
("https://archiveofsins.com/h/"),
|
||||
("https://arch.b4k.co/meta/"),
|
||||
("https://desuarchive.org/a/"),
|
||||
("https://boards.fireden.net/sci/"),
|
||||
("https://archive.nyafuu.org/c/"),
|
||||
("https://rbt.asia/g/"),
|
||||
("https://thebarchive.com/b/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
self.board = match.group(1)
|
||||
self.board = match.group(match.lastindex)
|
||||
|
||||
def items(self):
|
||||
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
||||
@@ -113,7 +198,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||
|
||||
for num, thread in threads.items():
|
||||
thread["url"] = thread_base + format(num)
|
||||
thread["_extractor"] = self.childclass
|
||||
thread["_extractor"] = FoolfuukaThreadExtractor
|
||||
yield Message.Queue, thread["url"], thread
|
||||
|
||||
|
||||
@@ -121,15 +206,24 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for search results on FoolFuuka based boards/archives"""
|
||||
subcategory = "search"
|
||||
directory_fmt = ("{category}", "search", "{search}")
|
||||
pattern_fmt = r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
|
||||
request_interval = 1.0
|
||||
test = (
|
||||
("https://archive.4plebs.org/_/search/text/test/"),
|
||||
("https://archived.moe/_/search/text/test/"),
|
||||
("https://archiveofsins.com/_/search/text/test/"),
|
||||
("https://archiveofsins.com/_/search/text/test/"),
|
||||
("https://desuarchive.org/_/search/text/test/"),
|
||||
("https://boards.fireden.net/_/search/text/test/"),
|
||||
("https://archive.nyafuu.org/_/search/text/test/"),
|
||||
("https://rbt.asia/_/search/text/test/"),
|
||||
("https://thebarchive.com/_/search/text/test/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
board, search = match.groups()
|
||||
|
||||
self.params = params = {}
|
||||
args = search.split("/")
|
||||
args = match.group(match.lastindex).split("/")
|
||||
key = None
|
||||
|
||||
for arg in args:
|
||||
@@ -138,6 +232,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
key = None
|
||||
else:
|
||||
key = arg
|
||||
|
||||
board = match.group(match.lastindex-1)
|
||||
if board != "_":
|
||||
params["boards"] = board
|
||||
|
||||
@@ -170,105 +266,3 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
if len(posts) <= 3:
|
||||
return
|
||||
params["page"] += 1
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"4plebs": {
|
||||
"name": "_4plebs",
|
||||
"root": "https://archive.4plebs.org",
|
||||
"pattern": r"(?:archive\.)?4plebs\.org",
|
||||
"test-thread": ("https://archive.4plebs.org/tg/thread/54059290", {
|
||||
"url": "07452944164b602502b02b24521f8cee5c484d2a",
|
||||
}),
|
||||
"test-board": ("https://archive.4plebs.org/tg/",),
|
||||
"test-search": ("https://archive.4plebs.org/_/search/text/test/",),
|
||||
},
|
||||
"archivedmoe": {
|
||||
"root": "https://archived.moe",
|
||||
"test-thread": (
|
||||
("https://archived.moe/gd/thread/309639/", {
|
||||
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
|
||||
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
|
||||
}),
|
||||
("https://archived.moe/a/thread/159767162/", {
|
||||
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
|
||||
}),
|
||||
),
|
||||
"test-board": ("https://archived.moe/gd/",),
|
||||
"test-search": ("https://archived.moe/_/search/text/test/",),
|
||||
},
|
||||
"archiveofsins": {
|
||||
"root": "https://archiveofsins.com",
|
||||
"pattern": r"(?:www\.)?archiveofsins\.com",
|
||||
"test-thread": ("https://archiveofsins.com/h/thread/4668813/", {
|
||||
"url": "f612d287087e10a228ef69517cf811539db9a102",
|
||||
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
|
||||
}),
|
||||
"test-board": ("https://archiveofsins.com/h/",),
|
||||
"test-search": ("https://archiveofsins.com/_/search/text/test/",),
|
||||
},
|
||||
"b4k": {
|
||||
"root": "https://arch.b4k.co",
|
||||
"extra": {"external": "direct"},
|
||||
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
|
||||
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
|
||||
}),
|
||||
"test-board": ("https://arch.b4k.co/meta/",),
|
||||
"test-search": ("https://arch.b4k.co/_/search/text/test/",),
|
||||
},
|
||||
"desuarchive": {
|
||||
"root": "https://desuarchive.org",
|
||||
"test-thread": ("https://desuarchive.org/a/thread/159542679/", {
|
||||
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
|
||||
}),
|
||||
"test-board": ("https://desuarchive.org/a/",),
|
||||
"test-search": ("https://desuarchive.org/_/search/text/test/",),
|
||||
},
|
||||
"fireden": {
|
||||
"root": "https://boards.fireden.net",
|
||||
"test-thread": ("https://boards.fireden.net/sci/thread/11264294/", {
|
||||
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
|
||||
}),
|
||||
"test-board": ("https://boards.fireden.net/sci/",),
|
||||
"test-search": ("https://boards.fireden.net/_/search/text/test/",),
|
||||
},
|
||||
"nyafuu": {
|
||||
"root": "https://archive.nyafuu.org",
|
||||
"pattern": r"(?:archive\.)?nyafuu\.org",
|
||||
"test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", {
|
||||
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
|
||||
}),
|
||||
"test-board": ("https://archive.nyafuu.org/c/",),
|
||||
"test-search": ("https://archive.nyafuu.org/_/search/text/test/",),
|
||||
},
|
||||
"rbt": {
|
||||
"root": "https://rbt.asia",
|
||||
"pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
|
||||
"test-thread": (
|
||||
("https://rbt.asia/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
),
|
||||
"test-board": ("https://rbt.asia/g/",),
|
||||
"test-search": ("https://rbt.asia/_/search/text/test/",),
|
||||
},
|
||||
"thebarchive": {
|
||||
"root": "https://thebarchive.com",
|
||||
"pattern": r"thebarchive\.com",
|
||||
"test-thread": ("https://thebarchive.com/b/thread/739772332/", {
|
||||
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
|
||||
}),
|
||||
"test-board": ("https://thebarchive.com/b/",),
|
||||
"test-search": ("https://thebarchive.com/_/search/text/test/",),
|
||||
},
|
||||
"_ckey": "childclass",
|
||||
}
|
||||
|
||||
generate_extractors(EXTRACTORS, globals(), (
|
||||
FoolfuukaThreadExtractor,
|
||||
FoolfuukaBoardExtractor,
|
||||
FoolfuukaSearchExtractor,
|
||||
))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016-2020 Mike Fährmann
|
||||
# Copyright 2016-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -8,23 +8,21 @@
|
||||
|
||||
"""Extractors for FoOlSlide based sites"""
|
||||
|
||||
from .common import (
|
||||
Extractor,
|
||||
ChapterExtractor,
|
||||
MangaExtractor,
|
||||
Message,
|
||||
generate_extractors,
|
||||
)
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text, util
|
||||
import json
|
||||
|
||||
|
||||
class FoolslideBase():
|
||||
class FoolslideExtractor(BaseExtractor):
|
||||
"""Base class for FoOlSlide extractors"""
|
||||
basecategory = "foolslide"
|
||||
|
||||
def __init__(self, match):
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.gallery_url = self.root + match.group(match.lastindex)
|
||||
|
||||
def request(self, url):
|
||||
return Extractor.request(
|
||||
return BaseExtractor.request(
|
||||
self, url, encoding="utf-8", method="POST", data={"adult": "true"})
|
||||
|
||||
@staticmethod
|
||||
@@ -40,12 +38,53 @@ class FoolslideBase():
|
||||
return data
|
||||
|
||||
|
||||
class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
|
||||
BASE_PATTERN = FoolslideExtractor.update({
|
||||
"dokireader": {
|
||||
"root": "https://kobato.hologfx.com/reader",
|
||||
},
|
||||
"kireicake": {
|
||||
"root": "https://reader.kireicake.com",
|
||||
},
|
||||
"powermanga": {
|
||||
"root": "https://read.powermanga.org",
|
||||
"pattern": r"read(?:er)?\.powermanga\.org",
|
||||
},
|
||||
"sensescans": {
|
||||
"root": "https://sensescans.com/reader",
|
||||
"pattern": r"(?:(?:www\.)?sensescans\.com/reader"
|
||||
r"|reader\.sensescans\.com)",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class FoolslideChapterExtractor(FoolslideExtractor):
|
||||
"""Base class for chapter extractors for FoOlSlide based sites"""
|
||||
subcategory = "chapter"
|
||||
directory_fmt = ("{category}", "{manga}", "{chapter_string}")
|
||||
filename_fmt = (
|
||||
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
||||
archive_fmt = "{id}"
|
||||
pattern_fmt = r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
|
||||
decode = "default"
|
||||
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
|
||||
test = (
|
||||
(("https://kobato.hologfx.com/reader/read/"
|
||||
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
|
||||
"keyword": "6e719ac86f0c6dab89390dd7e507e678459e0dbc",
|
||||
}),
|
||||
("https://reader.kireicake.com/read/wonderland/en/1/1/", {
|
||||
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
|
||||
"keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
|
||||
}),
|
||||
(("https://read.powermanga.org"
|
||||
"/read/one_piece_digital_colour_comics/en/0/75/"), {
|
||||
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
|
||||
"keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
|
||||
}),
|
||||
("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", {
|
||||
"url": "bbd428dc578f5055e9f86ad635b510386cd317cd",
|
||||
"keyword": "083ef6f8831c84127fe4096fa340a249be9d1424",
|
||||
}),
|
||||
("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"),
|
||||
)
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.gallery_url).text
|
||||
@@ -83,9 +122,51 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
|
||||
return json.loads(text.extract(page, "var pages = ", ";")[0])
|
||||
|
||||
|
||||
class FoolslideMangaExtractor(FoolslideBase, MangaExtractor):
|
||||
class FoolslideMangaExtractor(FoolslideExtractor):
|
||||
"""Base class for manga extractors for FoOlSlide based sites"""
|
||||
pattern_fmt = r"(/series/[^/?#]+)"
|
||||
subcategory = "manga"
|
||||
categorytransfer = True
|
||||
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
|
||||
test = (
|
||||
(("https://kobato.hologfx.com/reader/series/"
|
||||
"boku_ha_ohimesama_ni_narenai/"), {
|
||||
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
|
||||
"keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
|
||||
}),
|
||||
("https://reader.kireicake.com/series/wonderland/", {
|
||||
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
|
||||
"keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb",
|
||||
}),
|
||||
(("https://read.powermanga.org"
|
||||
"/series/one_piece_digital_colour_comics/"), {
|
||||
"count": ">= 1",
|
||||
"keyword": {
|
||||
"chapter": int,
|
||||
"chapter_minor": str,
|
||||
"chapter_string": str,
|
||||
"group": "PowerManga",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "One Piece Digital Colour Comics",
|
||||
"title": str,
|
||||
"volume": int,
|
||||
},
|
||||
}),
|
||||
("https://sensescans.com/reader/series/yotsubato/", {
|
||||
"count": ">= 3",
|
||||
}),
|
||||
)
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.gallery_url).text
|
||||
|
||||
chapters = self.chapters(page)
|
||||
if not self.config("chapter-reverse", False):
|
||||
chapters.reverse()
|
||||
|
||||
for chapter, data in chapters:
|
||||
data["_extractor"] = FoolslideChapterExtractor
|
||||
yield Message.Queue, chapter, data
|
||||
|
||||
def chapters(self, page):
|
||||
extr = text.extract_from(page)
|
||||
@@ -103,82 +184,3 @@ class FoolslideMangaExtractor(FoolslideBase, MangaExtractor):
|
||||
"chapter_string": extr('title="', '"'),
|
||||
"group" : extr('title="', '"'),
|
||||
})))
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"dokireader": {
|
||||
"root": "https://kobato.hologfx.com/reader",
|
||||
"test-chapter":
|
||||
(("https://kobato.hologfx.com/reader/read/"
|
||||
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
|
||||
"keyword": "6e719ac86f0c6dab89390dd7e507e678459e0dbc",
|
||||
}),
|
||||
"test-manga":
|
||||
(("https://kobato.hologfx.com/reader/series/"
|
||||
"boku_ha_ohimesama_ni_narenai/"), {
|
||||
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
|
||||
"keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
|
||||
}),
|
||||
},
|
||||
"kireicake": {
|
||||
"root": "https://reader.kireicake.com",
|
||||
"test-chapter":
|
||||
("https://reader.kireicake.com/read/wonderland/en/1/1/", {
|
||||
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
|
||||
"keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
|
||||
}),
|
||||
"test-manga":
|
||||
("https://reader.kireicake.com/series/wonderland/", {
|
||||
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
|
||||
"keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb",
|
||||
}),
|
||||
},
|
||||
"powermanga": {
|
||||
"root": "https://read.powermanga.org",
|
||||
"pattern": r"read(?:er)?\.powermanga\.org",
|
||||
"test-chapter":
|
||||
(("https://read.powermanga.org"
|
||||
"/read/one_piece_digital_colour_comics/en/0/75/"), {
|
||||
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
|
||||
"keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
|
||||
}),
|
||||
"test-manga":
|
||||
(("https://read.powermanga.org"
|
||||
"/series/one_piece_digital_colour_comics/"), {
|
||||
"count": ">= 1",
|
||||
"keyword": {
|
||||
"chapter": int,
|
||||
"chapter_minor": str,
|
||||
"chapter_string": str,
|
||||
"group": "PowerManga",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "One Piece Digital Colour Comics",
|
||||
"title": str,
|
||||
"volume": int,
|
||||
},
|
||||
}),
|
||||
},
|
||||
"sensescans": {
|
||||
"root": "https://sensescans.com/reader",
|
||||
"pattern": r"(?:(?:www\.)?sensescans\.com/reader"
|
||||
r"|reader\.sensescans\.com)",
|
||||
"test-chapter": (
|
||||
("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", {
|
||||
"url": "bbd428dc578f5055e9f86ad635b510386cd317cd",
|
||||
"keyword": "083ef6f8831c84127fe4096fa340a249be9d1424",
|
||||
}),
|
||||
("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"),
|
||||
),
|
||||
"test-manga":
|
||||
("https://sensescans.com/reader/series/yotsubato/", {
|
||||
"count": ">= 3",
|
||||
}),
|
||||
},
|
||||
"_ckey": "chapterclass",
|
||||
}
|
||||
|
||||
generate_extractors(EXTRACTORS, globals(), (
|
||||
FoolslideChapterExtractor,
|
||||
FoolslideMangaExtractor,
|
||||
))
|
||||
|
||||
@@ -366,13 +366,6 @@ class InstagramUserExtractor(InstagramExtractor):
|
||||
)
|
||||
|
||||
def items(self):
|
||||
if self.config("highlights"):
|
||||
self.log.warning("'highlights' is deprecated, "
|
||||
"use '\"include\": \"…,highlights\"' instead")
|
||||
default = ("highlights", "posts")
|
||||
else:
|
||||
default = ("posts",)
|
||||
|
||||
base = "{}/{}/".format(self.root, self.item)
|
||||
stories = "{}/stories/{}/".format(self.root, self.item)
|
||||
return self._dispatch_extractors((
|
||||
@@ -380,7 +373,7 @@ class InstagramUserExtractor(InstagramExtractor):
|
||||
(InstagramHighlightsExtractor, base + "highlights/"),
|
||||
(InstagramPostsExtractor , base + "posts/"),
|
||||
(InstagramChannelExtractor , base + "channel/"),
|
||||
), default)
|
||||
), ("posts",))
|
||||
|
||||
|
||||
class InstagramPostsExtractor(InstagramExtractor):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2020 Mike Fährmann
|
||||
# Copyright 2019-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -8,35 +8,25 @@
|
||||
|
||||
"""Extractors for mastodon instances"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, config, exception
|
||||
import re
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text, exception
|
||||
from ..cache import cache
|
||||
|
||||
|
||||
class MastodonExtractor(Extractor):
|
||||
class MastodonExtractor(BaseExtractor):
|
||||
"""Base class for mastodon extractors"""
|
||||
basecategory = "mastodon"
|
||||
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
|
||||
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
|
||||
archive_fmt = "{media[id]}"
|
||||
cookiedomain = None
|
||||
instance = None
|
||||
root = None
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.api = MastodonAPI(self)
|
||||
|
||||
def config(self, key, default=None):
|
||||
return config.interpolate_common(
|
||||
("extractor",), (
|
||||
(self.category, self.subcategory),
|
||||
(self.basecategory, self.instance, self.subcategory),
|
||||
), key, default,
|
||||
)
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.instance = self.root.partition("://")[2]
|
||||
self.item = match.group(match.lastindex)
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for status in self.statuses():
|
||||
attachments = status["media_attachments"]
|
||||
if attachments:
|
||||
@@ -60,34 +50,81 @@ class MastodonExtractor(Extractor):
|
||||
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
|
||||
INSTANCES = {
|
||||
"mastodon.social": {
|
||||
"root" : "https://mastodon.social",
|
||||
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
|
||||
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
|
||||
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
|
||||
},
|
||||
"pawoo": {
|
||||
"root" : "https://pawoo.net",
|
||||
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
|
||||
"0d62d0a75a8525953098c167eacd3668",
|
||||
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
|
||||
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
|
||||
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
|
||||
"8428ef1fadb446dcfeb4f5ed6872d97b",
|
||||
},
|
||||
"baraag": {
|
||||
"root" : "https://baraag.net",
|
||||
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
|
||||
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
|
||||
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
|
||||
}
|
||||
}
|
||||
|
||||
BASE_PATTERN = MastodonExtractor.update(INSTANCES)
|
||||
|
||||
|
||||
class MastodonUserExtractor(MastodonExtractor):
|
||||
"""Extractor for all images of an account/user"""
|
||||
subcategory = "user"
|
||||
|
||||
def __init__(self, match):
|
||||
MastodonExtractor.__init__(self, match)
|
||||
self.account_name = match.group(1)
|
||||
pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$"
|
||||
test = (
|
||||
("https://mastodon.social/@jk", {
|
||||
"pattern": r"https://files.mastodon.social/media_attachments"
|
||||
r"/files/(\d+/){3,}original/\w+",
|
||||
"range": "1-60",
|
||||
"count": 60,
|
||||
}),
|
||||
("https://pawoo.net/@yoru_nine/", {
|
||||
"range": "1-60",
|
||||
"count": 60,
|
||||
}),
|
||||
("https://baraag.net/@pumpkinnsfw"),
|
||||
)
|
||||
|
||||
def statuses(self):
|
||||
handle = "@{}@{}".format(self.account_name, self.instance)
|
||||
for account in self.api.account_search(handle, 1):
|
||||
if account["username"] == self.account_name:
|
||||
api = MastodonAPI(self)
|
||||
username = self.item
|
||||
handle = "@{}@{}".format(username, self.instance)
|
||||
for account in api.account_search(handle, 1):
|
||||
if account["username"] == username:
|
||||
break
|
||||
else:
|
||||
raise exception.NotFoundError("account")
|
||||
return self.api.account_statuses(account["id"])
|
||||
return api.account_statuses(account["id"])
|
||||
|
||||
|
||||
class MastodonStatusExtractor(MastodonExtractor):
|
||||
"""Extractor for images from a status"""
|
||||
subcategory = "status"
|
||||
|
||||
def __init__(self, match):
|
||||
MastodonExtractor.__init__(self, match)
|
||||
self.status_id = match.group(1)
|
||||
pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)"
|
||||
test = (
|
||||
("https://mastodon.social/@jk/103794036899778366", {
|
||||
"count": 4,
|
||||
}),
|
||||
("https://pawoo.net/@yoru_nine/105038878897832922", {
|
||||
"content": "b52e807f8ab548d6f896b09218ece01eba83987a",
|
||||
}),
|
||||
("https://baraag.net/@pumpkinnsfw/104364170556898443", {
|
||||
"content": "67748c1b828c58ad60d0fe5729b59fb29c872244",
|
||||
}),
|
||||
)
|
||||
|
||||
def statuses(self):
|
||||
return (self.api.status(self.status_id),)
|
||||
return (MastodonAPI(self).status(self.item),)
|
||||
|
||||
|
||||
class MastodonAPI():
|
||||
@@ -97,35 +134,46 @@ class MastodonAPI():
|
||||
https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md
|
||||
"""
|
||||
|
||||
def __init__(self, extractor, access_token=None):
|
||||
def __init__(self, extractor):
|
||||
self.root = extractor.root
|
||||
self.extractor = extractor
|
||||
|
||||
access_token = extractor.config("access-token")
|
||||
if access_token is None or access_token == "cache":
|
||||
access_token = _access_token_cache(extractor.instance)
|
||||
if not access_token:
|
||||
access_token = extractor.config(
|
||||
"access-token", extractor.access_token)
|
||||
self.headers = {"Authorization": "Bearer {}".format(access_token)}
|
||||
try:
|
||||
access_token = INSTANCES[extractor.category]["access-token"]
|
||||
except (KeyError, TypeError):
|
||||
raise exception.StopExtraction(
|
||||
"Missing access token.\n"
|
||||
"Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
|
||||
extractor.instance)
|
||||
|
||||
self.headers = {"Authorization": "Bearer " + access_token}
|
||||
|
||||
def account_search(self, query, limit=40):
|
||||
"""Search for content"""
|
||||
endpoint = "/v1/accounts/search"
|
||||
params = {"q": query, "limit": limit}
|
||||
return self._call("accounts/search", params).json()
|
||||
return self._call(endpoint, params).json()
|
||||
|
||||
def account_statuses(self, account_id):
|
||||
"""Get an account's statuses"""
|
||||
endpoint = "accounts/{}/statuses".format(account_id)
|
||||
endpoint = "/v1/accounts/{}/statuses".format(account_id)
|
||||
params = {"only_media": "1"}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def status(self, status_id):
|
||||
"""Fetch a Status"""
|
||||
return self._call("statuses/" + status_id).json()
|
||||
"""Fetch a status"""
|
||||
endpoint = "/v1/statuses/" + status_id
|
||||
return self._call(endpoint).json()
|
||||
|
||||
def _call(self, endpoint, params=None):
|
||||
if endpoint.startswith("http"):
|
||||
url = endpoint
|
||||
else:
|
||||
url = "{}/api/v1/{}".format(self.root, endpoint)
|
||||
url = self.root + "/api" + endpoint
|
||||
|
||||
while True:
|
||||
response = self.extractor.request(
|
||||
@@ -145,7 +193,7 @@ class MastodonAPI():
|
||||
raise exception.StopExtraction(response.json().get("error"))
|
||||
|
||||
def _pagination(self, endpoint, params):
|
||||
url = "{}/api/v1/{}".format(self.root, endpoint)
|
||||
url = endpoint
|
||||
while url:
|
||||
response = self._call(url, params)
|
||||
yield from response.json()
|
||||
@@ -156,86 +204,6 @@ class MastodonAPI():
|
||||
url = url["url"]
|
||||
|
||||
|
||||
def generate_extractors():
|
||||
"""Dynamically generate Extractor classes for Mastodon instances"""
|
||||
|
||||
symtable = globals()
|
||||
extractors = config.get(("extractor",), "mastodon")
|
||||
if extractors:
|
||||
util.combine_dict(EXTRACTORS, extractors)
|
||||
config.set(("extractor",), "mastodon", EXTRACTORS)
|
||||
|
||||
for instance, info in EXTRACTORS.items():
|
||||
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
|
||||
category = info.get("category") or instance.replace(".", "")
|
||||
root = info.get("root") or "https://" + instance
|
||||
name = (info.get("name") or category).capitalize()
|
||||
token = info.get("access-token")
|
||||
pattern = info.get("pattern") or re.escape(instance)
|
||||
|
||||
class Extr(MastodonUserExtractor):
|
||||
pass
|
||||
|
||||
Extr.__name__ = Extr.__qualname__ = name + "UserExtractor"
|
||||
Extr.__doc__ = "Extractor for all images of a user on " + instance
|
||||
Extr.category = category
|
||||
Extr.instance = instance
|
||||
Extr.pattern = (r"(?:https?://)?" + pattern +
|
||||
r"/@([^/?#]+)(?:/media)?/?$")
|
||||
Extr.test = info.get("test-user")
|
||||
Extr.root = root
|
||||
Extr.access_token = token
|
||||
symtable[Extr.__name__] = Extr
|
||||
|
||||
class Extr(MastodonStatusExtractor):
|
||||
pass
|
||||
|
||||
Extr.__name__ = Extr.__qualname__ = name + "StatusExtractor"
|
||||
Extr.__doc__ = "Extractor for images from a status on " + instance
|
||||
Extr.category = category
|
||||
Extr.instance = instance
|
||||
Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?#]+/(\d+)"
|
||||
Extr.test = info.get("test-status")
|
||||
Extr.root = root
|
||||
Extr.access_token = token
|
||||
symtable[Extr.__name__] = Extr
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"mastodon.social": {
|
||||
"category" : "mastodon.social",
|
||||
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
|
||||
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
|
||||
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
|
||||
"test-user" : ("https://mastodon.social/@jk", {
|
||||
"pattern": r"https://files.mastodon.social/media_attachments"
|
||||
r"/files/(\d+/){3,}original/\w+",
|
||||
"range": "1-60",
|
||||
"count": 60,
|
||||
}),
|
||||
"test-status" : ("https://mastodon.social/@jk/103794036899778366", {
|
||||
"count": 4,
|
||||
}),
|
||||
},
|
||||
"pawoo.net": {
|
||||
"category" : "pawoo",
|
||||
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
|
||||
"0d62d0a75a8525953098c167eacd3668",
|
||||
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
|
||||
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
|
||||
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
|
||||
"8428ef1fadb446dcfeb4f5ed6872d97b",
|
||||
},
|
||||
"baraag.net": {
|
||||
"category" : "baraag",
|
||||
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
|
||||
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
|
||||
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
generate_extractors()
|
||||
@cache(maxage=100*365*24*3600, keyarg=0)
|
||||
def _access_token_cache(instance):
|
||||
return None
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2018 Mike Fährmann
|
||||
# Copyright 2015-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -52,4 +52,4 @@ class Message():
|
||||
# Cookies = 5
|
||||
Queue = 6
|
||||
# Urllist = 7
|
||||
Metadata = 8
|
||||
# Metadata = 8
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 Mike Fährmann
|
||||
# Copyright 2020-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
"""Extractors for Moebooru based sites"""
|
||||
|
||||
from .common import generate_extractors
|
||||
from .booru import BooruExtractor
|
||||
from .. import text
|
||||
|
||||
@@ -52,15 +51,93 @@ class MoebooruExtractor(BooruExtractor):
|
||||
params["page"] += 1
|
||||
|
||||
|
||||
BASE_PATTERN = MoebooruExtractor.update({
|
||||
"yandere": {
|
||||
"root": "https://yande.re",
|
||||
},
|
||||
"konachan": {
|
||||
"root": "https://konachan.com",
|
||||
"pattern": r"konachan\.(?:com|net)",
|
||||
},
|
||||
"hypnohub": {
|
||||
"root": "https://hypnohub.net",
|
||||
},
|
||||
"sakugabooru": {
|
||||
"root": "https://www.sakugabooru.com",
|
||||
"pattern": r"(?:www\.)?sakugabooru\.com",
|
||||
},
|
||||
"lolibooru": {
|
||||
"root": "https://lolibooru.moe",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class MoebooruPostExtractor(MoebooruExtractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/post/show/(\d+)"
|
||||
test = (
|
||||
("https://yande.re/post/show/51824", {
|
||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "sasaki_tamaru",
|
||||
"tags_circle": "softhouse_chara",
|
||||
"tags_copyright": "ouzoku",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
("https://konachan.com/post/show/205189", {
|
||||
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "patata",
|
||||
"tags_character": "clownpiece",
|
||||
"tags_copyright": "touhou",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
("https://konachan.net/post/show/205189"),
|
||||
("https://hypnohub.net/post/show/73964", {
|
||||
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
|
||||
}),
|
||||
("https://www.sakugabooru.com/post/show/125570"),
|
||||
("https://lolibooru.moe/post/show/287835"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
MoebooruExtractor.__init__(self, match)
|
||||
self.post_id = match.group(match.lastindex)
|
||||
|
||||
def posts(self):
|
||||
params = {"tags": "id:" + self.post_id}
|
||||
return self.request(self.root + "/post.json", params=params).json()
|
||||
|
||||
|
||||
class MoebooruTagExtractor(MoebooruExtractor):
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern_fmt = r"/post\?(?:[^&#]*&)*tags=([^&#]+)"
|
||||
pattern = BASE_PATTERN + r"/post\?(?:[^&#]*&)*tags=([^&#]+)"
|
||||
test = (
|
||||
("https://yande.re/post?tags=ouzoku+armor", {
|
||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||
}),
|
||||
("https://konachan.com/post?tags=patata", {
|
||||
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
|
||||
}),
|
||||
("https://konachan.net/post?tags=patata"),
|
||||
("https://hypnohub.net/post?tags=gonoike_biwa", {
|
||||
"url": "072330c34a1e773d0cafd00e64b8060d34b078b6",
|
||||
}),
|
||||
("https://www.sakugabooru.com/post?tags=nichijou"),
|
||||
("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
MoebooruExtractor.__init__(self, match)
|
||||
self.tags = text.unquote(match.group(1).replace("+", " "))
|
||||
tags = match.group(match.lastindex)
|
||||
self.tags = text.unquote(tags.replace("+", " "))
|
||||
|
||||
def metadata(self):
|
||||
return {"search_tags": self.tags}
|
||||
@@ -74,11 +151,25 @@ class MoebooruPoolExtractor(MoebooruExtractor):
|
||||
subcategory = "pool"
|
||||
directory_fmt = ("{category}", "pool", "{pool}")
|
||||
archive_fmt = "p_{pool}_{id}"
|
||||
pattern_fmt = r"/pool/show/(\d+)"
|
||||
pattern = BASE_PATTERN + r"/pool/show/(\d+)"
|
||||
test = (
|
||||
("https://yande.re/pool/show/318", {
|
||||
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
|
||||
}),
|
||||
("https://konachan.com/pool/show/95", {
|
||||
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
|
||||
}),
|
||||
("https://konachan.net/pool/show/95"),
|
||||
("https://hypnohub.net/pool/show/61", {
|
||||
"url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf",
|
||||
}),
|
||||
("https://www.sakugabooru.com/pool/show/54"),
|
||||
("https://lolibooru.moe/pool/show/239"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
MoebooruExtractor.__init__(self, match)
|
||||
self.pool_id = match.group(1)
|
||||
self.pool_id = match.group(match.lastindex)
|
||||
|
||||
def metadata(self):
|
||||
return {"pool": text.parse_int(self.pool_id)}
|
||||
@@ -88,29 +179,34 @@ class MoebooruPoolExtractor(MoebooruExtractor):
|
||||
return self._pagination(self.root + "/post.json", params)
|
||||
|
||||
|
||||
class MoebooruPostExtractor(MoebooruExtractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern_fmt = r"/post/show/(\d+)"
|
||||
|
||||
def __init__(self, match):
|
||||
MoebooruExtractor.__init__(self, match)
|
||||
self.post_id = match.group(1)
|
||||
|
||||
def posts(self):
|
||||
params = {"tags": "id:" + self.post_id}
|
||||
return self.request(self.root + "/post.json", params=params).json()
|
||||
|
||||
|
||||
class MoebooruPopularExtractor(MoebooruExtractor):
|
||||
subcategory = "popular"
|
||||
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
|
||||
archive_fmt = "P_{scale[0]}_{date}_{id}"
|
||||
pattern_fmt = r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?"
|
||||
pattern = BASE_PATTERN + \
|
||||
r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?"
|
||||
test = (
|
||||
("https://yande.re/post/popular_by_month?month=6&year=2014", {
|
||||
"count": 40,
|
||||
}),
|
||||
("https://yande.re/post/popular_recent"),
|
||||
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
|
||||
"count": 20,
|
||||
}),
|
||||
("https://konachan.com/post/popular_recent"),
|
||||
("https://konachan.net/post/popular_recent"),
|
||||
("https://hypnohub.net/post/popular_by_month?month=6&year=2014", {
|
||||
"count": 20,
|
||||
}),
|
||||
("https://hypnohub.net/post/popular_recent"),
|
||||
("https://www.sakugabooru.com/post/popular_recent"),
|
||||
("https://lolibooru.moe/post/popular_recent"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
MoebooruExtractor.__init__(self, match)
|
||||
self.scale, self.query = match.groups()
|
||||
self.scale = match.group(match.lastindex-1)
|
||||
self.query = match.group(match.lastindex)
|
||||
|
||||
def metadata(self):
|
||||
self.params = params = text.parse_query(self.query)
|
||||
@@ -138,108 +234,3 @@ class MoebooruPopularExtractor(MoebooruExtractor):
|
||||
def posts(self):
|
||||
url = "{}/post/popular_{}.json".format(self.root, self.scale)
|
||||
return self.request(url, params=self.params).json()
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"yandere": {
|
||||
"root": "https://yande.re",
|
||||
"test-tag": ("https://yande.re/post?tags=ouzoku+armor", {
|
||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||
}),
|
||||
"test-pool": ("https://yande.re/pool/show/318", {
|
||||
"content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
|
||||
}),
|
||||
"test-post": ("https://yande.re/post/show/51824", {
|
||||
"content": "59201811c728096b2d95ce6896fd0009235fe683",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "sasaki_tamaru",
|
||||
"tags_circle": "softhouse_chara",
|
||||
"tags_copyright": "ouzoku",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
"test-popular": (
|
||||
("https://yande.re/post/popular_by_month?month=6&year=2014", {
|
||||
"count": 40,
|
||||
}),
|
||||
("https://yande.re/post/popular_recent"),
|
||||
),
|
||||
},
|
||||
"konachan": {
|
||||
"root": "https://konachan.com",
|
||||
"pattern": r"konachan\.(?:com|net)",
|
||||
"test-tag": (
|
||||
("https://konachan.com/post?tags=patata", {
|
||||
"content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
|
||||
}),
|
||||
("https://konachan.net/post?tags=patata"),
|
||||
),
|
||||
"test-pool": (
|
||||
("https://konachan.com/pool/show/95", {
|
||||
"content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
|
||||
}),
|
||||
("https://konachan.net/pool/show/95"),
|
||||
),
|
||||
"test-post": (
|
||||
("https://konachan.com/post/show/205189", {
|
||||
"content": "674e75a753df82f5ad80803f575818b8e46e4b65",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "patata",
|
||||
"tags_character": "clownpiece",
|
||||
"tags_copyright": "touhou",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
("https://konachan.net/post/show/205189"),
|
||||
),
|
||||
"test-popular": (
|
||||
("https://konachan.com/post/popular_by_month?month=11&year=2010", {
|
||||
"count": 20,
|
||||
}),
|
||||
("https://konachan.com/post/popular_recent"),
|
||||
("https://konachan.net/post/popular_recent"),
|
||||
),
|
||||
},
|
||||
"hypnohub": {
|
||||
"root": "https://hypnohub.net",
|
||||
"test-tag": ("https://hypnohub.net/post?tags=gonoike_biwa", {
|
||||
"url": "072330c34a1e773d0cafd00e64b8060d34b078b6",
|
||||
}),
|
||||
"test-pool": ("https://hypnohub.net/pool/show/61", {
|
||||
"url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf",
|
||||
}),
|
||||
"test-post": ("https://hypnohub.net/post/show/73964", {
|
||||
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
|
||||
}),
|
||||
"test-popular": (
|
||||
("https://hypnohub.net/post/popular_by_month?month=6&year=2014", {
|
||||
"count": 20,
|
||||
}),
|
||||
("https://hypnohub.net/post/popular_recent"),
|
||||
),
|
||||
},
|
||||
"lolibooru": {
|
||||
"root": "https://lolibooru.moe",
|
||||
"test-tag" : ("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29",),
|
||||
"test-pool" : ("https://lolibooru.moe/pool/show/239",),
|
||||
"test-post" : ("https://lolibooru.moe/post/show/287835",),
|
||||
"test-popular": ("https://lolibooru.moe/post/popular_recent",),
|
||||
},
|
||||
"sakugabooru": {
|
||||
"root": "https://www.sakugabooru.com",
|
||||
"pattern": r"(?:www\.)?sakugabooru\.com",
|
||||
"test-tag" : ("https://www.sakugabooru.com/post?tags=nichijou",),
|
||||
"test-pool" : ("https://www.sakugabooru.com/pool/show/54",),
|
||||
"test-post" : ("https://www.sakugabooru.com/post/show/125570",),
|
||||
"test-popular": ("https://www.sakugabooru.com/post/popular_recent",),
|
||||
},
|
||||
}
|
||||
|
||||
generate_extractors(EXTRACTORS, globals(), (
|
||||
MoebooruTagExtractor,
|
||||
MoebooruPoolExtractor,
|
||||
MoebooruPostExtractor,
|
||||
MoebooruPopularExtractor,
|
||||
))
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Utility classes to setup OAuth and link accounts to gallery-dl"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from . import deviantart, flickr, pixiv, reddit, smugmug, tumblr
|
||||
from . import deviantart, flickr, mastodon, pixiv, reddit, smugmug, tumblr
|
||||
from .. import text, oauth, util, config, exception
|
||||
from ..cache import cache
|
||||
import urllib.parse
|
||||
@@ -106,9 +106,9 @@ class OAuthBase(Extractor):
|
||||
))
|
||||
|
||||
def _oauth2_authorization_code_grant(
|
||||
self, client_id, client_secret, auth_url, token_url,
|
||||
self, client_id, client_secret, auth_url, token_url, *,
|
||||
scope="read", key="refresh_token", auth=True,
|
||||
message_template=None, cache=None):
|
||||
cache=None, instance=None):
|
||||
"""Perform an OAuth2 authorization code grant"""
|
||||
|
||||
state = "gallery-dl_{}_{}".format(
|
||||
@@ -117,12 +117,12 @@ class OAuthBase(Extractor):
|
||||
)
|
||||
|
||||
auth_params = {
|
||||
"client_id": client_id,
|
||||
"client_id" : client_id,
|
||||
"response_type": "code",
|
||||
"state": state,
|
||||
"redirect_uri": self.redirect_uri,
|
||||
"duration": "permanent",
|
||||
"scope": scope,
|
||||
"state" : state,
|
||||
"redirect_uri" : self.redirect_uri,
|
||||
"duration" : "permanent",
|
||||
"scope" : scope,
|
||||
}
|
||||
|
||||
# receive an authorization code
|
||||
@@ -140,8 +140,8 @@ class OAuthBase(Extractor):
|
||||
|
||||
# exchange the authorization code for a token
|
||||
data = {
|
||||
"grant_type": "authorization_code",
|
||||
"code": params["code"],
|
||||
"grant_type" : "authorization_code",
|
||||
"code" : params["code"],
|
||||
"redirect_uri": self.redirect_uri,
|
||||
}
|
||||
|
||||
@@ -159,27 +159,18 @@ class OAuthBase(Extractor):
|
||||
self.send(data["error"])
|
||||
return
|
||||
|
||||
token = data[key]
|
||||
token_name = key.replace("_", "-")
|
||||
|
||||
# write to cache
|
||||
if self.cache and cache:
|
||||
cache.update("#" + str(client_id), data[key])
|
||||
self.log.info("Writing 'refresh-token' to cache")
|
||||
cache.update(instance or ("#" + str(client_id)), token)
|
||||
self.log.info("Writing '%s' to cache", token_name)
|
||||
|
||||
# display token
|
||||
if message_template:
|
||||
msg = message_template.format(
|
||||
category=self.subcategory,
|
||||
key=key.partition("_")[0],
|
||||
token=data[key],
|
||||
instance=getattr(self, "instance", ""),
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
)
|
||||
else:
|
||||
msg = self._generate_message(
|
||||
("refresh-token",),
|
||||
(data[key],),
|
||||
)
|
||||
self.send(msg)
|
||||
self.send(self._generate_message(
|
||||
(token_name,), (token,),
|
||||
))
|
||||
|
||||
def _generate_message(self, names, values):
|
||||
_vh, _va, _is, _it = (
|
||||
@@ -326,8 +317,10 @@ class OAuthMastodon(OAuthBase):
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
|
||||
application = self.oauth_config(self.instance)
|
||||
if not application:
|
||||
for application in mastodon.INSTANCES.values():
|
||||
if self.instance == application["root"].partition("://")[2]:
|
||||
break
|
||||
else:
|
||||
application = self._register(self.instance)
|
||||
|
||||
self._oauth2_authorization_code_grant(
|
||||
@@ -335,8 +328,9 @@ class OAuthMastodon(OAuthBase):
|
||||
application["client-secret"],
|
||||
"https://{}/oauth/authorize".format(self.instance),
|
||||
"https://{}/oauth/token".format(self.instance),
|
||||
instance=self.instance,
|
||||
key="access_token",
|
||||
message_template=MASTODON_MSG_TEMPLATE,
|
||||
cache=mastodon._access_token_cache,
|
||||
)
|
||||
|
||||
@cache(maxage=10*365*24*3600, keyarg=1)
|
||||
@@ -425,29 +419,3 @@ class OAuthPixiv(OAuthBase):
|
||||
""")
|
||||
code = input("code: ")
|
||||
return code.rpartition("=")[2].strip()
|
||||
|
||||
|
||||
MASTODON_MSG_TEMPLATE = """
|
||||
Your 'access-token' is
|
||||
|
||||
{token}
|
||||
|
||||
Put this value into your configuration file as
|
||||
'extractor.mastodon.{instance}.{key}-token'.
|
||||
|
||||
You can also add your 'client-id' and 'client-secret' values
|
||||
if you want to register another account in the future.
|
||||
|
||||
Example:
|
||||
{{
|
||||
"extractor": {{
|
||||
"mastodon": {{
|
||||
"{instance}": {{
|
||||
"{key}-token": "{token}",
|
||||
"client-id": "{client_id}",
|
||||
"client-secret": "{client_secret}"
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2020 Mike Fährmann
|
||||
# Copyright 2019-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -42,8 +42,6 @@ class PatreonExtractor(Extractor):
|
||||
hashes = set()
|
||||
|
||||
yield Message.Directory, post
|
||||
yield Message.Metadata, post
|
||||
|
||||
for kind, url, name in itertools.chain(
|
||||
self._images(post),
|
||||
self._attachments(post),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2020 Mike Fährmann
|
||||
# Copyright 2019-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -8,28 +8,23 @@
|
||||
|
||||
"""Extractors for Shopify instances"""
|
||||
|
||||
from .common import Extractor, Message, generate_extractors
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
|
||||
class ShopifyExtractor(Extractor):
|
||||
class ShopifyExtractor(BaseExtractor):
|
||||
"""Base class for Shopify extractors"""
|
||||
basecategory = "shopify"
|
||||
filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.item_url = self.root + match.group(1)
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
kwargs["retries"] = float("inf")
|
||||
return Extractor.request(self, url, **kwargs)
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.item_url = self.root + match.group(match.lastindex)
|
||||
|
||||
def items(self):
|
||||
data = self.metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
|
||||
headers = {"X-Requested-With": "XMLHttpRequest"}
|
||||
@@ -58,22 +53,34 @@ class ShopifyExtractor(Extractor):
|
||||
"""Return an iterable with all relevant product URLs"""
|
||||
|
||||
|
||||
BASE_PATTERN = ShopifyExtractor.update({
|
||||
"fashionnova": {
|
||||
"root": "https://www.fashionnova.com",
|
||||
"pattern": r"(?:www\.)?fashionnova\.com",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class ShopifyCollectionExtractor(ShopifyExtractor):
|
||||
"""Base class for collection extractors for Shopify based sites"""
|
||||
subcategory = "collection"
|
||||
directory_fmt = ("{category}", "{collection[title]}")
|
||||
pattern_fmt = r"(/collections/[\w-]+)/?(?:\?([^#]+))?(?:$|#)"
|
||||
|
||||
def __init__(self, match):
|
||||
ShopifyExtractor.__init__(self, match)
|
||||
self.params = match.group(2)
|
||||
pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])"
|
||||
test = (
|
||||
("https://www.fashionnova.com/collections/mini-dresses", {
|
||||
"range": "1-20",
|
||||
"count": 20,
|
||||
"archive": False,
|
||||
}),
|
||||
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
|
||||
("https://www.fashionnova.com/collections/mini-dresses#1"),
|
||||
)
|
||||
|
||||
def metadata(self):
|
||||
return self.request(self.item_url + ".json").json()
|
||||
|
||||
def products(self):
|
||||
params = text.parse_query(self.params)
|
||||
params["page"] = text.parse_int(params.get("page"), 1)
|
||||
params = {"page": 1}
|
||||
fetch = True
|
||||
last = None
|
||||
|
||||
@@ -107,36 +114,14 @@ class ShopifyProductExtractor(ShopifyExtractor):
|
||||
"""Base class for product extractors for Shopify based sites"""
|
||||
subcategory = "product"
|
||||
directory_fmt = ("{category}", "Products")
|
||||
pattern_fmt = r"((?:/collections/[\w-]+)?/products/[\w-]+)"
|
||||
pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)"
|
||||
test = (
|
||||
("https://www.fashionnova.com/products/essential-slide-red", {
|
||||
"pattern": r"https?://cdn\d*\.shopify.com/",
|
||||
"count": 3,
|
||||
}),
|
||||
("https://www.fashionnova.com/collections/flats/products/name"),
|
||||
)
|
||||
|
||||
def products(self):
|
||||
return (self.item_url,)
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"fashionnova": {
|
||||
"root": "https://www.fashionnova.com",
|
||||
"pattern": r"(?:www\.)?fashionnova\.com",
|
||||
"test-product": (
|
||||
("https://www.fashionnova.com/products/essential-slide-red", {
|
||||
"pattern": r"https?://cdn\d*\.shopify.com/",
|
||||
"count": 3,
|
||||
}),
|
||||
("https://www.fashionnova.com/collections/flats/products/name"),
|
||||
),
|
||||
"test-collection": (
|
||||
("https://www.fashionnova.com/collections/mini-dresses", {
|
||||
"range": "1-20",
|
||||
"count": 20,
|
||||
"archive": False,
|
||||
}),
|
||||
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
|
||||
("https://www.fashionnova.com/collections/mini-dresses#1"),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
generate_extractors(EXTRACTORS, globals(), (
|
||||
ShopifyProductExtractor,
|
||||
ShopifyCollectionExtractor,
|
||||
))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2020 Mike Fährmann
|
||||
# Copyright 2015-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -111,10 +111,6 @@ class Job():
|
||||
if self.pred_queue(url, kwds):
|
||||
self.handle_queue(url, kwds)
|
||||
|
||||
elif msg[0] == Message.Metadata:
|
||||
self.update_kwdict(msg[1])
|
||||
self.handle_metadata(msg[1])
|
||||
|
||||
elif msg[0] == Message.Version:
|
||||
if msg[1] != 1:
|
||||
raise "unsupported message-version ({}, {})".format(
|
||||
@@ -128,9 +124,6 @@ class Job():
|
||||
def handle_directory(self, kwdict):
|
||||
"""Handle Message.Directory"""
|
||||
|
||||
def handle_metadata(self, kwdict):
|
||||
"""Handle Message.Metadata"""
|
||||
|
||||
def handle_queue(self, url, kwdict):
|
||||
"""Handle Message.Queue"""
|
||||
|
||||
@@ -280,15 +273,6 @@ class DownloadJob(Job):
|
||||
for callback in self.hooks["post"]:
|
||||
callback(self.pathfmt)
|
||||
|
||||
def handle_metadata(self, kwdict):
|
||||
"""Run postprocessors with metadata from 'kwdict'"""
|
||||
if "metadata" in self.hooks:
|
||||
kwdict["extension"] = "metadata"
|
||||
pathfmt = self.pathfmt
|
||||
pathfmt.set_filename(kwdict)
|
||||
for callback in self.hooks["metadata"]:
|
||||
callback(pathfmt)
|
||||
|
||||
def handle_queue(self, url, kwdict):
|
||||
if url in self.visited:
|
||||
return
|
||||
@@ -624,8 +608,5 @@ class DataJob(Job):
|
||||
def handle_directory(self, kwdict):
|
||||
self.data.append((Message.Directory, self.filter(kwdict)))
|
||||
|
||||
def handle_metadata(self, kwdict):
|
||||
self.data.append((Message.Metadata, self.filter(kwdict)))
|
||||
|
||||
def handle_queue(self, url, kwdict):
|
||||
self.data.append((Message.Queue, url, self.filter(kwdict)))
|
||||
|
||||
@@ -136,9 +136,9 @@ def build_parser():
|
||||
help="Print URLs instead of downloading",
|
||||
)
|
||||
output.add_argument(
|
||||
"-G",
|
||||
"-G", "--resolve-urls",
|
||||
dest="list_urls", action="store_const", const=128,
|
||||
help=argparse.SUPPRESS,
|
||||
help="Print URLs instead of downloading; resolve intermediary URLs",
|
||||
)
|
||||
output.add_argument(
|
||||
"-j", "--dump-json",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018-2020 Mike Fährmann
|
||||
# Copyright 2018-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -39,10 +39,6 @@ class ExecPP(PostProcessor):
|
||||
events = options.get("event")
|
||||
if events is None:
|
||||
events = ("after",)
|
||||
if options.get("final"):
|
||||
self.log.warning("'final' is deprecated, "
|
||||
"use '\"event\": \"finalize\"' instead")
|
||||
events = ("finalize",)
|
||||
elif isinstance(events, str):
|
||||
events = events.split(",")
|
||||
for event in events:
|
||||
|
||||
@@ -55,10 +55,6 @@ class MetadataPP(PostProcessor):
|
||||
events = options.get("event")
|
||||
if events is None:
|
||||
events = ("file",)
|
||||
if options.get("bypost"):
|
||||
self.log.warning("'bypost' is deprecated, use '\"event\": "
|
||||
"\"post\"' and 'filename' instead")
|
||||
events = ("metadata",)
|
||||
elif isinstance(events, str):
|
||||
events = events.split(",")
|
||||
for event in events:
|
||||
|
||||
@@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
__version__ = "1.16.5"
|
||||
__version__ = "1.17.0-dev"
|
||||
|
||||
@@ -219,10 +219,6 @@ class TestExtractorWait(unittest.TestCase):
|
||||
|
||||
class TextExtractorOAuth(unittest.TestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
mastodon.generate_extractors()
|
||||
|
||||
def test_oauth1(self):
|
||||
for category in ("flickr", "smugmug", "tumblr"):
|
||||
extr = extractor.find("oauth:" + category)
|
||||
|
||||
Reference in New Issue
Block a user