[foolfuuka] match 4chan filenames (#2577)
introduce two new metadata fields: - filename_media: original filename of file uploaded to 4chan - timestamp_ms : timestamp with millisecond precision (tim)
This commit is contained in:
@@ -16,6 +16,7 @@ import itertools
|
|||||||
class FoolfuukaExtractor(BaseExtractor):
|
class FoolfuukaExtractor(BaseExtractor):
|
||||||
"""Base extractor for FoolFuuka based boards/archives"""
|
"""Base extractor for FoolFuuka based boards/archives"""
|
||||||
basecategory = "foolfuuka"
|
basecategory = "foolfuuka"
|
||||||
|
filename_fmt = "{timestamp_ms} {filename_media}.{extension}"
|
||||||
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
|
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
|
||||||
external = "default"
|
external = "default"
|
||||||
|
|
||||||
@@ -40,6 +41,9 @@ class FoolfuukaExtractor(BaseExtractor):
|
|||||||
|
|
||||||
post["filename"], _, post["extension"] = \
|
post["filename"], _, post["extension"] = \
|
||||||
media["media"].rpartition(".")
|
media["media"].rpartition(".")
|
||||||
|
post["filename_media"] = media["media_filename"].rpartition(".")[0]
|
||||||
|
post["timestamp_ms"] = text.parse_int(
|
||||||
|
media["media_orig"].rpartition(".")[0])
|
||||||
yield Message.Url, url, post
|
yield Message.Url, url, post
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
@@ -107,7 +111,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
|||||||
"""Base extractor for threads on FoolFuuka based boards/archives"""
|
"""Base extractor for threads on FoolFuuka based boards/archives"""
|
||||||
subcategory = "thread"
|
subcategory = "thread"
|
||||||
directory_fmt = ("{category}", "{board[shortname]}",
|
directory_fmt = ("{category}", "{board[shortname]}",
|
||||||
"{thread_num}{title:? - //}")
|
"{thread_num} {title|comment[:50]}")
|
||||||
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
|
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
|
||||||
test = (
|
test = (
|
||||||
("https://archive.4plebs.org/tg/thread/54059290", {
|
("https://archive.4plebs.org/tg/thread/54059290", {
|
||||||
|
|||||||
Reference in New Issue
Block a user