update/fix --list-extractors
This commit is contained in:
@@ -196,16 +196,15 @@ def main():
|
|||||||
|
|
||||||
elif args.list_extractors:
|
elif args.list_extractors:
|
||||||
write = sys.stdout.write
|
write = sys.stdout.write
|
||||||
fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format
|
fmt = ("{}{}\nCategory: {} - Subcategory: {}"
|
||||||
|
"\nExample : {}\n\n").format
|
||||||
|
|
||||||
for extr in extractor.extractors():
|
for extr in extractor.extractors():
|
||||||
if not extr.__doc__:
|
|
||||||
continue
|
|
||||||
test = next(extr._get_tests(), None)
|
|
||||||
write(fmt(
|
write(fmt(
|
||||||
extr.__name__, extr.__doc__,
|
extr.__name__,
|
||||||
|
"\n" + extr.__doc__ if extr.__doc__ else "",
|
||||||
extr.category, extr.subcategory,
|
extr.category, extr.subcategory,
|
||||||
"\nExample : " + test[0] if test else "",
|
extr.example,
|
||||||
))
|
))
|
||||||
|
|
||||||
elif args.clear_cache:
|
elif args.clear_cache:
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ class Extractor():
|
|||||||
archive_fmt = ""
|
archive_fmt = ""
|
||||||
cookies_domain = ""
|
cookies_domain = ""
|
||||||
browser = None
|
browser = None
|
||||||
example = ""
|
|
||||||
root = ""
|
root = ""
|
||||||
request_interval = 0.0
|
request_interval = 0.0
|
||||||
request_interval_min = 0.0
|
request_interval_min = 0.0
|
||||||
@@ -508,21 +507,6 @@ class Extractor():
|
|||||||
result.append((Message.Queue, url, {"_extractor": extr}))
|
result.append((Message.Queue, url, {"_extractor": extr}))
|
||||||
return iter(result)
|
return iter(result)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _get_tests(cls):
|
|
||||||
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
|
|
||||||
tests = cls.test
|
|
||||||
if not tests:
|
|
||||||
return
|
|
||||||
|
|
||||||
if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)):
|
|
||||||
tests = (tests,)
|
|
||||||
|
|
||||||
for test in tests:
|
|
||||||
if isinstance(test, str):
|
|
||||||
test = (test, None)
|
|
||||||
yield test
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _dump(cls, obj):
|
def _dump(cls, obj):
|
||||||
util.dump_json(obj, ensure_ascii=False, indent=2)
|
util.dump_json(obj, ensure_ascii=False, indent=2)
|
||||||
|
|||||||
@@ -110,18 +110,7 @@ class FlickrAlbumExtractor(FlickrExtractor):
|
|||||||
"Albums", "{album[id]} {album[title]}")
|
"Albums", "{album[id]} {album[title]}")
|
||||||
archive_fmt = "a_{album[id]}_{id}"
|
archive_fmt = "a_{album[id]}_{id}"
|
||||||
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
|
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
|
||||||
test = (
|
example = "https://www.flickr.com/photos/USER/albums/12345"
|
||||||
(("https://www.flickr.com/photos/shona_s/albums/72157633471741607"), {
|
|
||||||
"pattern": FlickrImageExtractor.pattern,
|
|
||||||
"count": 6,
|
|
||||||
}),
|
|
||||||
("https://www.flickr.com/photos/shona_s/albums", {
|
|
||||||
"pattern": pattern,
|
|
||||||
"count": 2,
|
|
||||||
}),
|
|
||||||
("https://secure.flickr.com/photos/shona_s/albums"),
|
|
||||||
("https://m.flickr.com/photos/shona_s/albums"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
FlickrExtractor.__init__(self, match)
|
FlickrExtractor.__init__(self, match)
|
||||||
|
|||||||
Reference in New Issue
Block a user