[wikimedia] Improved archive identifiers

This commit is contained in:
Ailothaen
2022-04-25 23:14:16 +02:00
committed by Mike Fährmann
parent e33056adcd
commit 221f54309c

View File

@@ -24,7 +24,7 @@ class WikimediaArticleExtractor(Extractor):
category = "wikimedia"
subcategory = "article"
filename_fmt = "{filename}.{extension}"
archive_fmt = "{filename}"
archive_fmt = "a_{sha1}"
pattern = r"https?://([a-z]{2,})\.wikipedia\.org/wiki/([^#/\?]+)"
directory_fmt = ("{category}", "{page}")
test = (
@@ -96,7 +96,7 @@ class WikimediaCategoryExtractor(Extractor):
category = "wikimedia"
subcategory = "category"
filename_fmt = "{filename}.{extension}"
archive_fmt = "{filename}"
archive_fmt = "c_{sha1}"
pattern = r"https?://commons.wikimedia.org/wiki/Category:([^#/\?]+)"
directory_fmt = ("{category}", "{page}")