remove test results in extractor modules

and add generic example URLs
This commit is contained in:
Mike Fährmann
2023-09-11 16:30:55 +02:00
parent a833c244c8
commit a453335a9f
176 changed files with 656 additions and 9554 deletions

View File

@@ -95,59 +95,8 @@ class BloggerExtractor(Extractor):
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?#]+\.html)"
test = (
("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", {
"url": "9928429fb62f712eb4de80f53625eccecc614aae",
"pattern": r"https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
"keyword": {
"blog": {
"date" : "dt:2010-11-21 18:19:42",
"description": "",
"id" : "5623928067739466034",
"kind" : "blogger#blog",
"locale" : dict,
"name" : "Julian Bunker Photography",
"pages" : int,
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
"url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
"content" : str,
"date" : "dt:2010-12-26 01:08:00",
"etag" : str,
"id" : "6955139236418998998",
"kind" : "blogger#post",
"published" : "2010-12-25T17:08:00-08:00",
"replies" : "0",
"title" : "Moon Rise",
"updated" : "2011-12-06T05:21:24-08:00",
"url" : "re:.+/2010/12/moon-rise.html$",
},
"num": int,
"url": str,
},
}),
("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"),
# video (#587)
(("http://cfnmscenesinmovies.blogspot.com/2011/11/"
"cfnm-scene-jenna-fischer-in-office.html"), {
"pattern": r"https://.+\.googlevideo\.com/videoplayback",
}),
# image URLs with width/height (#1061)
# ("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", {
# "pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png",
# }),
# new image domain (#2204)
(("https://randomthingsthroughmyletterbox.blogspot.com/2022/01"
"/bitter-flowers-by-gunnar-staalesen-blog.html"), {
"pattern": r"https://blogger.googleusercontent.com/img/a/.+=s0$",
"count": 8,
}),
)
pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
example = "https://BLOG.blogspot.com/YYYY/MM/TITLE.html"
def __init__(self, match):
BloggerExtractor.__init__(self, match)
@@ -161,17 +110,7 @@ class BloggerBlogExtractor(BloggerExtractor):
"""Extractor for an entire Blogger blog"""
subcategory = "blog"
pattern = BASE_PATTERN + r"/?$"
test = (
("https://julianbphotography.blogspot.com/", {
"range": "1-25",
"count": 25,
"pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
}),
("blogger:https://www.kefblog.com.ng/", {
"range": "1-25",
"count": 25,
}),
)
example = "https://BLOG.blogspot.com/"
def posts(self, blog):
return self.api.blog_posts(blog["id"])
@@ -181,12 +120,7 @@ class BloggerSearchExtractor(BloggerExtractor):
"""Extractor for Blogger search resuls"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
test = (
("https://julianbphotography.blogspot.com/search?q=400mm", {
"count": "< 10",
"keyword": {"query": "400mm"},
}),
)
example = "https://BLOG.blogspot.com/search?q=QUERY"
def __init__(self, match):
BloggerExtractor.__init__(self, match)
@@ -203,13 +137,7 @@ class BloggerLabelExtractor(BloggerExtractor):
"""Extractor for Blogger posts by label"""
subcategory = "label"
pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
test = (
("https://dmmagazine.blogspot.com/search/label/D%26D", {
"range": "1-25",
"count": 25,
"keyword": {"label": "D&D"},
}),
)
example = "https://BLOG.blogspot.com/search/label/LABEL"
def __init__(self, match):
BloggerExtractor.__init__(self, match)