[foolfuuka] improve 'board' pattern & support pages (#5408)
This commit is contained in:
@@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FoolfuukaExtractor.__init__(self, match)
|
FoolfuukaExtractor.__init__(self, match)
|
||||||
self.board = match.group(match.lastindex-1)
|
self.board = self.groups[-2]
|
||||||
self.thread = match.group(match.lastindex)
|
self.thread = self.groups[-1]
|
||||||
self.data = None
|
self.data = None
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -140,20 +140,22 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
|||||||
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||||
"""Base extractor for FoolFuuka based boards/archives"""
|
"""Base extractor for FoolFuuka based boards/archives"""
|
||||||
subcategory = "board"
|
subcategory = "board"
|
||||||
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
|
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
|
||||||
example = "https://archived.moe/a/"
|
example = "https://archived.moe/a/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FoolfuukaExtractor.__init__(self, match)
|
FoolfuukaExtractor.__init__(self, match)
|
||||||
self.board = match.group(match.lastindex)
|
self.board = self.groups[-2]
|
||||||
|
self.page = self.groups[-1]
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
||||||
self.root, self.board)
|
self.root, self.board)
|
||||||
thread_base = "{}/{}/thread/".format(self.root, self.board)
|
thread_base = "{}/{}/thread/".format(self.root, self.board)
|
||||||
|
|
||||||
for page in itertools.count(1):
|
page = self.page
|
||||||
with self.request(index_base + format(page)) as response:
|
for pnum in itertools.count(text.parse_int(page, 1)):
|
||||||
|
with self.request(index_base + format(pnum)) as response:
|
||||||
try:
|
try:
|
||||||
threads = response.json()
|
threads = response.json()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -167,6 +169,9 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
|||||||
thread["_extractor"] = FoolfuukaThreadExtractor
|
thread["_extractor"] = FoolfuukaThreadExtractor
|
||||||
yield Message.Queue, thread["url"], thread
|
yield Message.Queue, thread["url"], thread
|
||||||
|
|
||||||
|
if page:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||||
"""Base extractor for search results on FoolFuuka based boards/archives"""
|
"""Base extractor for search results on FoolFuuka based boards/archives"""
|
||||||
@@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FoolfuukaExtractor.__init__(self, match)
|
FoolfuukaExtractor.__init__(self, match)
|
||||||
self.params = params = {}
|
self.params = params = {}
|
||||||
args = match.group(match.lastindex).split("/")
|
|
||||||
key = None
|
|
||||||
|
|
||||||
for arg in args:
|
key = None
|
||||||
|
for arg in self.groups[-1].split("/"):
|
||||||
if key:
|
if key:
|
||||||
params[key] = text.unescape(arg)
|
params[key] = text.unescape(arg)
|
||||||
key = None
|
key = None
|
||||||
else:
|
else:
|
||||||
key = arg
|
key = arg
|
||||||
|
|
||||||
board = match.group(match.lastindex-1)
|
board = self.groups[-2]
|
||||||
if board != "_":
|
if board != "_":
|
||||||
params["boards"] = board
|
params["boards"] = board
|
||||||
|
|
||||||
|
|||||||
@@ -15,12 +15,32 @@ __tests__ = (
|
|||||||
"#sha1_url": "e7d624aded15a069194e38dc731ec23217a422fb",
|
"#sha1_url": "e7d624aded15a069194e38dc731ec23217a422fb",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://desuarchive.org/a",
|
||||||
|
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||||
|
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://desuarchive.org/a/",
|
"#url" : "https://desuarchive.org/a/",
|
||||||
"#category": ("foolfuuka", "desuarchive", "board"),
|
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||||
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://desuarchive.org/a/2",
|
||||||
|
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||||
|
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://desuarchive.org/a/page/2",
|
||||||
|
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||||
|
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||||
|
"#pattern" : foolfuuka.FoolfuukaThreadExtractor.pattern,
|
||||||
|
"#count" : 10,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://desuarchive.org/_/search/text/test/",
|
"#url" : "https://desuarchive.org/_/search/text/test/",
|
||||||
"#category": ("foolfuuka", "desuarchive", "search"),
|
"#category": ("foolfuuka", "desuarchive", "search"),
|
||||||
|
|||||||
Reference in New Issue
Block a user