[wikimedia] add ability to change directory before downloading revisions
This commit is contained in:
@@ -62,7 +62,10 @@ class WikimediaExtractor(BaseExtractor):
|
||||
return url
|
||||
raise exception.AbortExtraction("Unable to find API endpoint")
|
||||
|
||||
def prepare(self, image):
|
||||
def prepare_info(self, page):
|
||||
"""Adjust the content of an image info object"""
|
||||
|
||||
def prepare_image(self, image):
|
||||
"""Adjust the content of an image object"""
|
||||
image["metadata"] = {
|
||||
m["name"]: m["value"]
|
||||
@@ -80,14 +83,18 @@ class WikimediaExtractor(BaseExtractor):
|
||||
def items(self):
|
||||
for info in self._pagination(self.params):
|
||||
try:
|
||||
images = info["imageinfo"]
|
||||
images = info.pop("imageinfo")
|
||||
except KeyError:
|
||||
self.log.debug("Missing 'imageinfo' for %s", info)
|
||||
continue
|
||||
images = ()
|
||||
|
||||
for image in images:
|
||||
self.prepare(image)
|
||||
yield Message.Directory, image
|
||||
info["count"] = len(images)
|
||||
self.prepare_info(info)
|
||||
yield Message.Directory, info
|
||||
|
||||
for info["num"], image in enumerate(images, 1):
|
||||
self.prepare_image(image)
|
||||
image.update(info)
|
||||
yield Message.Url, image["url"], image
|
||||
|
||||
if self.subcategories:
|
||||
@@ -245,9 +252,8 @@ class WikimediaArticleExtractor(WikimediaExtractor):
|
||||
"titles" : path,
|
||||
}
|
||||
|
||||
def prepare(self, image):
|
||||
WikimediaExtractor.prepare(self, image)
|
||||
image["page"] = self.title
|
||||
def prepare_info(self, info):
|
||||
info["page"] = self.title
|
||||
|
||||
|
||||
class WikimediaWikiExtractor(WikimediaExtractor):
|
||||
|
||||
@@ -44,6 +44,19 @@ __tests__ = (
|
||||
"width" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://tl.wikipedia.org/wiki/Sitosol",
|
||||
"#comment" : "revisions of an image in an article",
|
||||
"#category": ("wikimedia", "wikipedia", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
"#options" : {"image-revisions": 10},
|
||||
"#count" : 2,
|
||||
"#pattern" : (
|
||||
r"https://upload.wikimedia.org/wikipedia/commons/2/2e/Crowded_cytosol.png",
|
||||
r"https://upload.wikimedia.org/wikipedia/commons/archive/2/2e/20080911161129%21Crowded_cytosol.png",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikipedia.org/wiki/Category:Physics",
|
||||
"#category": ("wikimedia", "wikipedia", "category"),
|
||||
|
||||
Reference in New Issue
Block a user