[blogger] add 'label' or 'query' metadata fields (#2930)

for '/search/label/…' or '/search?q=…' URLs
This commit is contained in:
Mike Fährmann
2022-09-20 11:37:39 +02:00
parent eef50c1f28
commit d699310fdf

View File

@@ -48,6 +48,7 @@ class BloggerExtractor(Extractor):
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
findall_video = re.compile( findall_video = re.compile(
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
metadata = self.metadata()
for post in self.posts(blog): for post in self.posts(blog):
content = post["content"] content = post["content"]
@@ -74,18 +75,21 @@ class BloggerExtractor(Extractor):
del post["selfLink"] del post["selfLink"]
del post["blog"] del post["blog"]
yield Message.Directory, {"blog": blog, "post": post} data = {"blog": blog, "post": post}
for num, url in enumerate(files, 1): if metadata:
yield Message.Url, url, text.nameext_from_url(url, { data.update(metadata)
"blog": blog, yield Message.Directory, data
"post": post,
"url" : url, for data["num"], url in enumerate(files, 1):
"num" : num, data["url"] = url
}) yield Message.Url, url, text.nameext_from_url(url, data)
def posts(self, blog): def posts(self, blog):
"""Return an iterable with all relevant post objects""" """Return an iterable with all relevant post objects"""
def metadata(self):
"""Return additional metadata"""
class BloggerPostExtractor(BloggerExtractor): class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post""" """Extractor for a single blog post"""
@@ -178,7 +182,8 @@ class BloggerSearchExtractor(BloggerExtractor):
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)" pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
test = ( test = (
("https://julianbphotography.blogspot.com/search?q=400mm", { ("https://julianbphotography.blogspot.com/search?q=400mm", {
"count": "< 10" "count": "< 10",
"keyword": {"query": "400mm"},
}), }),
) )
@@ -189,6 +194,9 @@ class BloggerSearchExtractor(BloggerExtractor):
def posts(self, blog): def posts(self, blog):
return self.api.blog_search(blog["id"], self.query) return self.api.blog_search(blog["id"], self.query)
def metadata(self):
return {"query": self.query}
class BloggerLabelExtractor(BloggerExtractor): class BloggerLabelExtractor(BloggerExtractor):
"""Extractor for Blogger posts by label""" """Extractor for Blogger posts by label"""
@@ -198,6 +206,7 @@ class BloggerLabelExtractor(BloggerExtractor):
("https://dmmagazine.blogspot.com/search/label/D%26D", { ("https://dmmagazine.blogspot.com/search/label/D%26D", {
"range": "1-25", "range": "1-25",
"count": 25, "count": 25,
"keyword": {"label": "D&D"},
}), }),
) )
@@ -208,6 +217,9 @@ class BloggerLabelExtractor(BloggerExtractor):
def posts(self, blog): def posts(self, blog):
return self.api.blog_posts(blog["id"], self.label) return self.api.blog_posts(blog["id"], self.label)
def metadata(self):
return {"label": self.label}
class BloggerAPI(): class BloggerAPI():
"""Minimal interface for the Blogger v3 API """Minimal interface for the Blogger v3 API