[dt] replace 'text.parse_datetime()' & 'text.parse_timestamp()'

This commit is contained in:
Mike Fährmann
2025-10-17 17:43:06 +02:00
parent d57dc48dcd
commit 085616e0a8
138 changed files with 220 additions and 260 deletions

View File

@@ -46,7 +46,7 @@ class _2chThreadExtractor(Extractor):
for post in posts: for post in posts:
if files := post.get("files"): if files := post.get("files"):
post["post_name"] = post["name"] post["post_name"] = post["name"]
post["date"] = text.parse_timestamp(post["timestamp"]) post["date"] = self.parse_timestamp(post["timestamp"])
del post["files"] del post["files"]
del post["name"] del post["name"]

View File

@@ -65,7 +65,7 @@ class _2chenThreadExtractor(Extractor):
extr = text.extract_from(post) extr = text.extract_from(post)
return { return {
"name" : text.unescape(extr("<span>", "</span>")), "name" : text.unescape(extr("<span>", "</span>")),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr("<time", "<").partition(">")[2], extr("<time", "<").partition(">")[2],
"%d %b %Y (%a) %H:%M:%S" "%d %b %Y (%a) %H:%M:%S"
), ),

View File

@@ -61,7 +61,7 @@ class _4archiveThreadExtractor(Extractor):
extr = text.extract_from(post) extr = text.extract_from(post)
data = { data = {
"name": extr('class="name">', "</span>"), "name": extr('class="name">', "</span>"),
"date": text.parse_datetime( "date": self.parse_datetime(
(extr('class="dateTime">', "<") or (extr('class="dateTime">', "<") or
extr('class="dateTime postNum" >', "<")).strip(), extr('class="dateTime postNum" >', "<")).strip(),
"%Y-%m-%d %H:%M:%S"), "%Y-%m-%d %H:%M:%S"),

View File

@@ -85,7 +85,7 @@ class _8musesAlbumExtractor(Extractor):
"parent" : text.parse_int(album["parentId"]), "parent" : text.parse_int(album["parentId"]),
"views" : text.parse_int(album["numberViews"]), "views" : text.parse_int(album["numberViews"]),
"likes" : text.parse_int(album["numberLikes"]), "likes" : text.parse_int(album["numberLikes"]),
"date" : text.parse_datetime( "date" : self.parse_datetime(
album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"), album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"),
} }

View File

@@ -33,7 +33,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
"gallery_id": text.parse_int(self.gallery_id), "gallery_id": text.parse_int(self.gallery_id),
"title" : text.unescape(extr('title="', '"')), "title" : text.unescape(extr('title="', '"')),
"studio" : extr(">studio</small>", "<").strip(), "studio" : extr(">studio</small>", "<").strip(),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
">released</small>", "<").strip(), "%m/%d/%Y"), ">released</small>", "<").strip(), "%m/%d/%Y"),
"actors" : sorted(text.split_html(extr( "actors" : sorted(text.split_html(extr(
'<ul class="item-details item-cast-list ', '</ul>'))[1:]), '<ul class="item-details item-cast-list ', '</ul>'))[1:]),

View File

@@ -33,7 +33,7 @@ class AgnphExtractor(booru.BooruExtractor):
self.cookies.set("confirmed_age", "true", domain="agn.ph") self.cookies.set("confirmed_age", "true", domain="agn.ph")
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_timestamp(post["created_at"]) post["date"] = self.parse_timestamp(post["created_at"])
post["status"] = post["status"].strip() post["status"] = post["status"].strip()
post["has_children"] = ("true" in post["has_children"]) post["has_children"] = ("true" in post["has_children"])

View File

@@ -182,11 +182,11 @@ class Ao3WorkExtractor(Ao3Extractor):
extr('<dd class="freeform tags">', "</dd>")), extr('<dd class="freeform tags">', "</dd>")),
"lang" : extr('<dd class="language" lang="', '"'), "lang" : extr('<dd class="language" lang="', '"'),
"series" : extr('<dd class="series">', "</dd>"), "series" : extr('<dd class="series">', "</dd>"),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('<dd class="published">', "<"), "%Y-%m-%d"), extr('<dd class="published">', "<"), "%Y-%m-%d"),
"date_completed": text.parse_datetime( "date_completed": self.parse_datetime(
extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"), extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"),
"date_updated" : text.parse_timestamp( "date_updated" : self.parse_timestamp(
path.rpartition("updated_at=")[2]), path.rpartition("updated_at=")[2]),
"words" : text.parse_int( "words" : text.parse_int(
extr('<dd class="words">', "<").replace(",", "")), extr('<dd class="words">', "<").replace(",", "")),

View File

@@ -49,7 +49,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
files = self._extract_files(post) files = self._extract_files(post)
post["count"] = len(files) post["count"] = len(files)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
post["post_url"] = post_url = \ post["post_url"] = post_url = \
f"{self.root}/b/{post['boardSlug']}/{post['id']}" f"{self.root}/b/{post['boardSlug']}/{post['id']}"

View File

@@ -126,7 +126,7 @@ class ArtstationExtractor(Extractor):
data["title"] = text.unescape(data["title"]) data["title"] = text.unescape(data["title"])
data["description"] = text.unescape(text.remove_html( data["description"] = text.unescape(text.remove_html(
data["description"])) data["description"]))
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime(
data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
assets = data["assets"] assets = data["assets"]

View File

@@ -123,7 +123,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
"chapter_minor" : minor, "chapter_minor" : minor,
"chapter_string": info, "chapter_string": info,
"chapter_id" : text.parse_int(self.chapter_id), "chapter_id" : text.parse_int(self.chapter_id),
"date" : text.parse_timestamp(extr(' time="', '"')[:-3]), "date" : self.parse_timestamp(extr(' time="', '"')[:-3]),
} }
def images(self, page): def images(self, page):
@@ -167,7 +167,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
data["chapter"] = text.parse_int(chapter) data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor data["chapter_minor"] = sep + minor
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime(
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ") extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
url = f"{self.root}/title/{href}" url = f"{self.root}/title/{href}"
@@ -188,9 +188,9 @@ def _manga_info(self, manga_id, page=None):
"manga" : data["name"][1], "manga" : data["name"][1],
"manga_id" : text.parse_int(manga_id), "manga_id" : text.parse_int(manga_id),
"manga_slug" : data["slug"][1], "manga_slug" : data["slug"][1],
"manga_date" : text.parse_timestamp( "manga_date" : self.parse_timestamp(
data["dateCreate"][1] // 1000), data["dateCreate"][1] // 1000),
"manga_date_updated": text.parse_timestamp( "manga_date_updated": self.parse_timestamp(
data["dateUpdate"][1] / 1000), data["dateUpdate"][1] / 1000),
"author" : json_list(data["authors"]), "author" : json_list(data["authors"]),
"artist" : json_list(data["artists"]), "artist" : json_list(data["artists"]),

View File

@@ -67,7 +67,7 @@ class BehanceExtractor(Extractor):
tags = [tag["title"] for tag in tags] tags = [tag["title"] for tag in tags]
data["tags"] = tags data["tags"] = tags
data["date"] = text.parse_timestamp( data["date"] = self.parse_timestamp(
data.get("publishedOn") or data.get("conceived_on") or 0) data.get("publishedOn") or data.get("conceived_on") or 0)
if creator := data.get("creator"): if creator := data.get("creator"):

View File

@@ -141,8 +141,8 @@ class BellazonExtractor(Extractor):
"title": schema["headline"], "title": schema["headline"],
"views": stats[0]["userInteractionCount"], "views": stats[0]["userInteractionCount"],
"posts": stats[1]["userInteractionCount"], "posts": stats[1]["userInteractionCount"],
"date" : text.parse_datetime(schema["datePublished"]), "date" : self.parse_datetime(schema["datePublished"]),
"date_updated": text.parse_datetime(schema["dateModified"]), "date_updated": self.parse_datetime(schema["dateModified"]),
"description" : text.unescape(schema["text"]).strip(), "description" : text.unescape(schema["text"]).strip(),
"section" : path[-2], "section" : path[-2],
"author" : author["name"], "author" : author["name"],
@@ -162,7 +162,7 @@ class BellazonExtractor(Extractor):
post = { post = {
"id": extr('id="elComment_', '"'), "id": extr('id="elComment_', '"'),
"author_url": extr(" href='", "'"), "author_url": extr(" href='", "'"),
"date": text.parse_datetime(extr("datetime='", "'")), "date": self.parse_datetime(extr("datetime='", "'")),
"content": extr("<!-- Post content -->", "\n\t\t</div>"), "content": extr("<!-- Post content -->", "\n\t\t</div>"),
} }

View File

@@ -40,7 +40,7 @@ class BloggerExtractor(BaseExtractor):
blog = self.api.blog_by_url("http://" + self.blog) blog = self.api.blog_by_url("http://" + self.blog)
blog["pages"] = blog["pages"]["totalItems"] blog["pages"] = blog["pages"]["totalItems"]
blog["posts"] = blog["posts"]["totalItems"] blog["posts"] = blog["posts"]["totalItems"]
blog["date"] = text.parse_datetime(blog["published"]) blog["date"] = self.parse_datetime(blog["published"])
del blog["selfLink"] del blog["selfLink"]
findall_image = util.re( findall_image = util.re(
@@ -65,7 +65,7 @@ class BloggerExtractor(BaseExtractor):
post["author"] = post["author"]["displayName"] post["author"] = post["author"]["displayName"]
post["replies"] = post["replies"]["totalItems"] post["replies"] = post["replies"]["totalItems"]
post["content"] = text.remove_html(content) post["content"] = text.remove_html(content)
post["date"] = text.parse_datetime(post["published"]) post["date"] = self.parse_datetime(post["published"])
del post["selfLink"] del post["selfLink"]
del post["blog"] del post["blog"]

View File

@@ -135,7 +135,7 @@ class BlueskyExtractor(Extractor):
post["instance"] = self.instance post["instance"] = self.instance
post["post_id"] = self._pid(post) post["post_id"] = self._pid(post)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
def _extract_files(self, post): def _extract_files(self, post):

View File

@@ -78,7 +78,7 @@ class BoostyExtractor(Extractor):
post["links"] = links = [] post["links"] = links = []
if "createdAt" in post: if "createdAt" in post:
post["date"] = text.parse_timestamp(post["createdAt"]) post["date"] = self.parse_timestamp(post["createdAt"])
for block in post["data"]: for block in post["data"]:
try: try:

View File

@@ -70,7 +70,7 @@ class BoothItemExtractor(BoothExtractor):
url + ".json", headers=headers, interval=False) url + ".json", headers=headers, interval=False)
item["booth_category"] = item.pop("category", None) item["booth_category"] = item.pop("category", None)
item["date"] = text.parse_datetime( item["date"] = self.parse_datetime(
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
item["tags"] = [t["name"] for t in item["tags"]] item["tags"] = [t["name"] for t in item["tags"]]

View File

@@ -167,7 +167,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
item, 'name: "', ".") item, 'name: "', ".")
file["size"] = text.parse_int(text.extr( file["size"] = text.parse_int(text.extr(
item, "size: ", " ,\n")) item, "size: ", " ,\n"))
file["date"] = text.parse_datetime(text.extr( file["date"] = self.parse_datetime(text.extr(
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y") item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
yield file yield file

View File

@@ -28,7 +28,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
return { return {
"album_id" : self.page_url.rpartition("/")[2], "album_id" : self.page_url.rpartition("/")[2],
"album_name" : text.unescape(extr("<h1>", "<")), "album_name" : text.unescape(extr("<h1>", "<")),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
"<p>Created ", "<"), "%B %d %Y"), "<p>Created ", "<"), "%B %d %Y"),
"description": text.unescape(extr("<p>", "<")), "description": text.unescape(extr("<p>", "<")),
} }

View File

@@ -79,7 +79,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
"url" : url, "url" : url,
"album": text.remove_html(extr( "album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]), "Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"), '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : extr('username: "', '"'), "user" : extr('username: "', '"'),
} }
@@ -116,7 +116,7 @@ class CheveretoVideoExtractor(CheveretoExtractor):
'class="far fa-clock"></i>', ""), 'class="far fa-clock"></i>', ""),
"album": text.remove_html(extr( "album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]), "Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"), '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : extr('username: "', '"'), "user" : extr('username: "', '"'),
} }

View File

@@ -61,7 +61,7 @@ class CienArticleExtractor(CienExtractor):
post["post_url"] = url post["post_url"] = url
post["post_id"] = text.parse_int(post_id) post["post_id"] = text.parse_int(post_id)
post["count"] = len(files) post["count"] = len(files)
post["date"] = text.parse_datetime(post["datePublished"]) post["date"] = self.parse_datetime(post["datePublished"])
try: try:
post["author"]["id"] = text.parse_int(author_id) post["author"]["id"] = text.parse_int(author_id)

View File

@@ -86,7 +86,7 @@ class CivitaiExtractor(Extractor):
images = self.api.images_post(post["id"]) images = self.api.images_post(post["id"])
post = self.api.post(post["id"]) post = self.api.post(post["id"])
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
data = { data = {
"post": post, "post": post,
@@ -122,7 +122,7 @@ class CivitaiExtractor(Extractor):
data["post"] = post = self._extract_meta_post(file) data["post"] = post = self._extract_meta_post(file)
if post: if post:
post.pop("user", None) post.pop("user", None)
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
data["url"] = url = self._url(file) data["url"] = url = self._url(file)
@@ -180,7 +180,7 @@ class CivitaiExtractor(Extractor):
if "id" not in file and data["filename"].isdecimal(): if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"]) file["id"] = text.parse_int(data["filename"])
if "date" not in file: if "date" not in file:
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
if self._meta_generation: if self._meta_generation:
file["generation"] = self._extract_meta_generation(file) file["generation"] = self._extract_meta_generation(file)
@@ -216,7 +216,7 @@ class CivitaiExtractor(Extractor):
def _extract_meta_post(self, image): def _extract_meta_post(self, image):
try: try:
post = self.api.post(image["postId"]) post = self.api.post(image["postId"])
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
return post return post
except Exception as exc: except Exception as exc:
@@ -278,7 +278,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
versions = (version,) versions = (version,)
for version in versions: for version in versions:
version["date"] = text.parse_datetime( version["date"] = self.parse_datetime(
version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
data = { data = {
@@ -593,7 +593,7 @@ class CivitaiGeneratedExtractor(CivitaiExtractor):
self._require_auth() self._require_auth()
for gen in self.api.orchestrator_queryGeneratedImages(): for gen in self.api.orchestrator_queryGeneratedImages():
gen["date"] = text.parse_datetime( gen["date"] = self.parse_datetime(
gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
yield Message.Directory, gen yield Message.Directory, gen
for step in gen.pop("steps", ()): for step in gen.pop("steps", ()):

View File

@@ -114,9 +114,9 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
"chapter_hid" : ch["hid"], "chapter_hid" : ch["hid"],
"chapter_string": chstr, "chapter_string": chstr,
"group" : ch["group_name"], "group" : ch["group_name"],
"date" : text.parse_datetime( "date" : self.parse_datetime(
ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"), ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"),
"date_updated" : text.parse_datetime( "date_updated" : self.parse_datetime(
ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"), ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
"lang" : ch["lang"], "lang" : ch["lang"],
} }

View File

@@ -60,6 +60,6 @@ class ComicvineTagExtractor(BooruExtractor):
_file_url = operator.itemgetter("original") _file_url = operator.itemgetter("original")
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["dateCreated"], "%a, %b %d %Y") post["dateCreated"], "%a, %b %d %Y")
post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]] post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]

View File

@@ -63,6 +63,10 @@ class Extractor():
else: else:
self.category = CATEGORY_MAP[self.category] self.category = CATEGORY_MAP[self.category]
self.parse_datetime = dt.parse
self.parse_datetime_iso = dt.parse_iso
self.parse_timestamp = dt.parse_ts
self._cfgpath = ("extractor", self.category, self.subcategory) self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = "" self._parentdir = ""

View File

@@ -47,7 +47,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
"album_name" : text.unescape(extr('title="', '"')), "album_name" : text.unescape(extr('title="', '"')),
"album_size" : text.parse_bytes(extr( "album_size" : text.parse_bytes(extr(
'<p class="title">', "B")), '<p class="title">', "B")),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'<p class="title">', '<'), "%d.%m.%Y"), '<p class="title">', '<'), "%d.%m.%Y"),
"description": text.unescape(text.unescape( # double "description": text.unescape(text.unescape( # double
desc.rpartition(" [R")[0])), desc.rpartition(" [R")[0])),

View File

@@ -113,7 +113,7 @@ class CyberfileFileExtractor(CyberfileExtractor):
"Filesize:", "</tr>"))[:-1]), "Filesize:", "</tr>"))[:-1]),
"tags" : text.split_html(extr( "tags" : text.split_html(extr(
"Keywords:", "</tr>")), "Keywords:", "</tr>")),
"date" : text.parse_datetime(text.remove_html(extr( "date" : self.parse_datetime(text.remove_html(extr(
"Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"), "Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"),
"permissions": text.remove_html(extr( "permissions": text.remove_html(extr(
"Permissions:", "</tr>")).split(" &amp; "), "Permissions:", "</tr>")).split(" &amp; "),

View File

@@ -68,7 +68,7 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
"chapter_minor": minor, "chapter_minor": minor,
"group" : manga["groups"][group_id].split(" & "), "group" : manga["groups"][group_id].split(" & "),
"group_id" : text.parse_int(group_id), "group_id" : text.parse_int(group_id),
"date" : text.parse_timestamp(data["release_date"][group_id]), "date" : self.parse_timestamp(data["release_date"][group_id]),
"lang" : util.NONE, "lang" : util.NONE,
"language" : util.NONE, "language" : util.NONE,
} }

View File

@@ -259,7 +259,7 @@ class DeviantartExtractor(Extractor):
deviation["published_time"] = text.parse_int( deviation["published_time"] = text.parse_int(
deviation["published_time"]) deviation["published_time"])
deviation["date"] = text.parse_timestamp( deviation["date"] = self.parse_timestamp(
deviation["published_time"]) deviation["published_time"])
if self.comments: if self.comments:
@@ -1187,7 +1187,7 @@ class DeviantartStatusExtractor(DeviantartExtractor):
deviation["username"] = deviation["author"]["username"] deviation["username"] = deviation["author"]["username"]
deviation["_username"] = deviation["username"].lower() deviation["_username"] = deviation["username"].lower()
deviation["date"] = d = text.parse_datetime(deviation["ts"]) deviation["date"] = d = self.parse_datetime(deviation["ts"])
deviation["published_time"] = int(dt.to_ts(d)) deviation["published_time"] = int(dt.to_ts(d))
deviation["da_category"] = "Status" deviation["da_category"] = "Status"

View File

@@ -72,7 +72,7 @@ class DiscordExtractor(Extractor):
"author_files": [], "author_files": [],
"message": self.extract_message_text(message), "message": self.extract_message_text(message),
"message_id": message["id"], "message_id": message["id"],
"date": text.parse_datetime( "date": self.parse_datetime(
message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z" message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z"
), ),
"files": [] "files": []

View File

@@ -62,7 +62,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"author" : text.remove_html(author), "author" : text.remove_html(author),
"group" : (text.remove_html(group) or "group" : (text.remove_html(group) or
text.extr(group, ' alt="', '"')), text.extr(group, ' alt="', '"')),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'"icon-calendar"></i> ', '<'), "%b %d, %Y"), '"icon-calendar"></i> ', '<'), "%b %d, %Y"),
"tags" : text.split_html(extr( "tags" : text.split_html(extr(
"class='tags'>", "<div id='chapter-actions'")), "class='tags'>", "<div id='chapter-actions'")),
@@ -166,8 +166,8 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
data["scanlator"] = content[1].text[11:] data["scanlator"] = content[1].text[11:]
data["tags"] = content[2].text[6:].lower().split(", ") data["tags"] = content[2].text[6:].lower().split(", ")
data["title"] = element[5].text data["title"] = element[5].text
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime(
element[1].text, "%Y-%m-%dT%H:%M:%S%z") element[1].text, "%Y-%m-%dT%H:%M:%S%z")
data["date_updated"] = text.parse_datetime( data["date_updated"] = self.parse_datetime(
element[2].text, "%Y-%m-%dT%H:%M:%S%z") element[2].text, "%Y-%m-%dT%H:%M:%S%z")
yield Message.Queue, element[4].text, data yield Message.Queue, element[4].text, data

View File

@@ -51,7 +51,7 @@ class E621Extractor(danbooru.DanbooruExtractor):
post["filename"] = file["md5"] post["filename"] = file["md5"]
post["extension"] = file["ext"] post["extension"] = file["ext"]
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post.update(data) post.update(data)

View File

@@ -96,7 +96,7 @@ class EromeAlbumExtractor(EromeExtractor):
if not date: if not date:
ts = text.extr(group, '?v=', '"') ts = text.extr(group, '?v=', '"')
if len(ts) > 1: if len(ts) > 1:
date = text.parse_timestamp(ts) date = self.parse_timestamp(ts)
data = { data = {
"album_id": album_id, "album_id": album_id,

View File

@@ -216,7 +216,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def _items_hitomi(self): def _items_hitomi(self):
if self.config("metadata", False): if self.config("metadata", False):
data = self.metadata_from_api() data = self.metadata_from_api()
data["date"] = text.parse_timestamp(data["posted"]) data["date"] = self.parse_timestamp(data["posted"])
else: else:
data = {} data = {}
@@ -233,7 +233,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data = self.metadata_from_page(page) data = self.metadata_from_page(page)
if self.config("metadata", False): if self.config("metadata", False):
data.update(self.metadata_from_api()) data.update(self.metadata_from_api())
data["date"] = text.parse_timestamp(data["posted"]) data["date"] = self.parse_timestamp(data["posted"])
if self.config("tags", False): if self.config("tags", False):
tags = collections.defaultdict(list) tags = collections.defaultdict(list)
for tag in data["tags"]: for tag in data["tags"]:
@@ -258,7 +258,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"_" : extr('<div id="gdc"><div class="cs ct', '"'), "_" : extr('<div id="gdc"><div class="cs ct', '"'),
"eh_category" : extr('>', '<'), "eh_category" : extr('>', '<'),
"uploader" : extr('<div id="gdn">', '</div>'), "uploader" : extr('<div id="gdn">', '</div>'),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"), '>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
"parent" : extr( "parent" : extr(
'>Parent:</td><td class="gdt2"><a href="', '"'), '>Parent:</td><td class="gdt2"><a href="', '"'),

View File

@@ -108,7 +108,7 @@ class FacebookExtractor(Extractor):
'"message":{"delight_ranges"', '"message":{"delight_ranges"',
'"},"message_preferred_body"' '"},"message_preferred_body"'
).rsplit('],"text":"', 1)[-1]), ).rsplit('],"text":"', 1)[-1]),
"date": text.parse_timestamp( "date": self.parse_timestamp(
text.extr(photo_page, '\\"publish_time\\":', ',') or text.extr(photo_page, '\\"publish_time\\":', ',') or
text.extr(photo_page, '"created_time":', ',') text.extr(photo_page, '"created_time":', ',')
), ),
@@ -172,7 +172,7 @@ class FacebookExtractor(Extractor):
"user_id": text.extr( "user_id": text.extr(
video_page, '"owner":{"__typename":"User","id":"', '"' video_page, '"owner":{"__typename":"User","id":"', '"'
), ),
"date": text.parse_timestamp(text.extr( "date": self.parse_timestamp(text.extr(
video_page, '\\"publish_time\\":', ',' video_page, '\\"publish_time\\":', ','
)), )),
"type": "video" "type": "video"

View File

@@ -128,7 +128,7 @@ class FanboxExtractor(Extractor):
if file.get("extension", "").lower() in exts if file.get("extension", "").lower() in exts
] ]
post["date"] = text.parse_datetime(post["publishedDatetime"]) post["date"] = self.parse_datetime(post["publishedDatetime"])
post["text"] = content_body.get("text") if content_body else None post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False post["isCoverImage"] = False

View File

@@ -35,7 +35,7 @@ class FanslyExtractor(Extractor):
for post in self.posts(): for post in self.posts():
files = self._extract_files(post) files = self._extract_files(post)
post["count"] = len(files) post["count"] = len(files)
post["date"] = text.parse_timestamp(post["createdAt"]) post["date"] = self.parse_timestamp(post["createdAt"])
yield Message.Directory, post yield Message.Directory, post
for post["num"], file in enumerate(files, 1): for post["num"], file in enumerate(files, 1):
@@ -117,8 +117,8 @@ class FanslyExtractor(Extractor):
file = { file = {
**variant, **variant,
"format": variant["type"], "format": variant["type"],
"date": text.parse_timestamp(media["createdAt"]), "date": self.parse_timestamp(media["createdAt"]),
"date_updated": text.parse_timestamp(media["updatedAt"]), "date_updated": self.parse_timestamp(media["updatedAt"]),
} }
if "metadata" in location: if "metadata" in location:

View File

@@ -101,7 +101,7 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"], "comment": resp["comment"],
"rating": resp["rating"], "rating": resp["rating"],
"posted_at": resp["posted_at"], "posted_at": resp["posted_at"],
"date": text.parse_datetime( "date": self.parse_datetime(
resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"), resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"], "fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"], "fanclub_user_id": resp["fanclub"]["user"]["id"],

View File

@@ -98,7 +98,7 @@ class FlickrImageExtractor(FlickrExtractor):
photo["comments"] = text.parse_int(photo["comments"]["_content"]) photo["comments"] = text.parse_int(photo["comments"]["_content"])
photo["description"] = photo["description"]["_content"] photo["description"] = photo["description"]["_content"]
photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]] photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
photo["date"] = text.parse_timestamp(photo["dateuploaded"]) photo["date"] = self.parse_timestamp(photo["dateuploaded"])
photo["views"] = text.parse_int(photo["views"]) photo["views"] = text.parse_int(photo["views"])
photo["id"] = text.parse_int(photo["id"]) photo["id"] = text.parse_int(photo["id"])
@@ -489,7 +489,7 @@ class FlickrAPI(oauth.OAuth1API):
def _extract_format(self, photo): def _extract_format(self, photo):
photo["description"] = photo["description"]["_content"].strip() photo["description"] = photo["description"]["_content"].strip()
photo["views"] = text.parse_int(photo["views"]) photo["views"] = text.parse_int(photo["views"])
photo["date"] = text.parse_timestamp(photo["dateupload"]) photo["date"] = self.parse_timestamp(photo["dateupload"])
photo["tags"] = photo["tags"].split() photo["tags"] = photo["tags"].split()
self._extract_metadata(photo) self._extract_metadata(photo)

View File

@@ -143,7 +143,7 @@ class FuraffinityExtractor(Extractor):
data["folders"] = () # folders not present in old layout data["folders"] = () # folders not present in old layout
data["user"] = self.user or data["artist_url"] data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) data["date"] = self.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"]) data["description"] = self._process_description(data["_description"])
data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-" data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-"
f"{path.rsplit('/', 2)[1]}.jpg") f"{path.rsplit('/', 2)[1]}.jpg")

View File

@@ -55,7 +55,7 @@ class Furry34Extractor(BooruExtractor):
def _prepare(self, post): def _prepare(self, post):
post.pop("files", None) post.pop("files", None)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ") post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["filename"], _, post["format"] = post["filename"].rpartition(".") post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post: if "tags" in post:

View File

@@ -246,7 +246,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs: for fav in favs:
for post in self._api_request({"id": fav["favorite"]}): for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"]) post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post yield post
params["pid"] += 1 params["pid"] += 1
@@ -273,7 +273,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs: for fav in favs:
for post in self._api_request({"id": fav["favorite"]}): for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"]) post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post yield post
params["pid"] -= 1 params["pid"] -= 1

View File

@@ -35,7 +35,7 @@ class GelbooruV01Extractor(booru.BooruExtractor):
} }
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0] post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S") post["created_at"], "%Y-%m-%d %H:%M:%S")
return post return post

View File

@@ -122,7 +122,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _prepare(self, post): def _prepare(self, post):
post["tags"] = post["tags"].strip() post["tags"] = post["tags"].strip()
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y") post["created_at"], "%a %b %d %H:%M:%S %z %Y")
def _html(self, post): def _html(self, post):

View File

@@ -52,7 +52,7 @@ class GirlsreleasedSetExtractor(GirlsreleasedExtractor):
"id": json["id"], "id": json["id"],
"site": json["site"], "site": json["site"],
"model": [model for _, model in json["models"]], "model": [model for _, model in json["models"]],
"date": text.parse_timestamp(json["date"]), "date": self.parse_timestamp(json["date"]),
"count": len(json["images"]), "count": len(json["images"]),
"url": "https://girlsreleased.com/set/" + json["id"], "url": "https://girlsreleased.com/set/" + json["id"],
} }

View File

@@ -101,7 +101,7 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
"model": model, "model": model,
"model_list": self._parse_model_list(model), "model_list": self._parse_model_list(model),
"tags": text.split_html(tags)[1::2], "tags": text.split_html(tags)[1::2],
"date": text.parse_datetime( "date": self.parse_datetime(
text.extr(page, 'class="hover-time" title="', '"')[:19], text.extr(page, 'class="hover-time" title="', '"')[:19],
"%Y-%m-%d %H:%M:%S"), "%Y-%m-%d %H:%M:%S"),
"is_favorite": self._parse_is_favorite(page), "is_favorite": self._parse_is_favorite(page),

View File

@@ -34,7 +34,7 @@ class HatenablogExtractor(Extractor):
def _handle_article(self, article: str): def _handle_article(self, article: str):
extr = text.extract_from(article) extr = text.extract_from(article)
date = text.parse_datetime(extr('<time datetime="', '"')) date = self.parse_datetime(extr('<time datetime="', '"'))
entry_link = text.unescape(extr('<a href="', '"')) entry_link = text.unescape(extr('<a href="', '"'))
entry = entry_link.partition("/entry/")[2] entry = entry_link.partition("/entry/")[2]
title = text.unescape(extr('>', '<')) title = text.unescape(extr('>', '<'))

View File

@@ -86,7 +86,7 @@ class HentaifoundryExtractor(Extractor):
.replace("\r\n", "\n")), .replace("\r\n", "\n")),
"ratings" : [text.unescape(r) for r in text.extract_iter(extr( "ratings" : [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")], "class='ratings_box'", "</div>"), "title='", "'")],
"date" : text.parse_datetime(extr("datetime='", "'")), "date" : self.parse_datetime(extr("datetime='", "'")),
"views" : text.parse_int(extr(">Views</span>", "<")), "views" : text.parse_int(extr(">Views</span>", "<")),
"score" : text.parse_int(extr(">Vote Score</span>", "<")), "score" : text.parse_int(extr(">Vote Score</span>", "<")),
"media" : text.unescape(extr(">Media</span>", "<").strip()), "media" : text.unescape(extr(">Media</span>", "<").strip()),
@@ -126,7 +126,7 @@ class HentaifoundryExtractor(Extractor):
"title" : text.unescape(extr( "title" : text.unescape(extr(
"<div class='titlebar'>", "</a>").rpartition(">")[2]), "<div class='titlebar'>", "</a>").rpartition(">")[2]),
"author" : text.unescape(extr('alt="', '"')), "author" : text.unescape(extr('alt="', '"')),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"), ">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
"status" : extr("class='indent'>", "<"), "status" : extr("class='indent'>", "<"),
} }

View File

@@ -35,7 +35,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
"language" : info["language"]["name"], "language" : info["language"]["name"],
"lang" : util.language_to_code(info["language"]["name"]), "lang" : util.language_to_code(info["language"]["name"]),
"tags" : [t["slug"] for t in info["tags"]], "tags" : [t["slug"] for t in info["tags"]],
"date" : text.parse_datetime( "date" : self.parse_datetime(
info["uploaded_at"], "%Y-%m-%d"), info["uploaded_at"], "%Y-%m-%d"),
} }
for key in ("artists", "authors", "groups", "characters", for key in ("artists", "authors", "groups", "characters",

View File

@@ -84,7 +84,7 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
"type" : info["type"].capitalize(), "type" : info["type"].capitalize(),
"language" : language, "language" : language,
"lang" : util.language_to_code(language), "lang" : util.language_to_code(language),
"date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"), "date" : self.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
"tags" : tags, "tags" : tags,
"artist" : [o["artist"] for o in iget("artists") or ()], "artist" : [o["artist"] for o in iget("artists") or ()],
"group" : [o["group"] for o in iget("groups") or ()], "group" : [o["group"] for o in iget("groups") or ()],

View File

@@ -53,10 +53,10 @@ class ImagechestGalleryExtractor(GalleryExtractor):
def _metadata_api(self, page): def _metadata_api(self, page):
post = self.api.post(self.gallery_id) post = self.api.post(self.gallery_id)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ") post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
for img in post["images"]: for img in post["images"]:
img["date"] = text.parse_datetime( img["date"] = self.parse_datetime(
img["created"], "%Y-%m-%dT%H:%M:%S.%fZ") img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["gallery_id"] = self.gallery_id post["gallery_id"] = self.gallery_id

View File

@@ -159,7 +159,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
"width" : text.parse_int(extr('"og:image:width" content="', '"')), "width" : text.parse_int(extr('"og:image:width" content="', '"')),
"height": text.parse_int(extr('"og:image:height" content="', '"')), "height": text.parse_int(extr('"og:image:height" content="', '"')),
"album" : extr("Added to <a", "</a>"), "album" : extr("Added to <a", "</a>"),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"), '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : util.json_loads(extr( "user" : util.json_loads(extr(
"CHV.obj.resource=", "};") + "}").get("user"), "CHV.obj.resource=", "};") + "}").get("user"),

View File

@@ -31,7 +31,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
"title": text.unescape(extr("<h1>", "</h1>")), "title": text.unescape(extr("<h1>", "</h1>")),
"count": text.parse_int(extr( "count": text.parse_int(extr(
"total of images in this gallery: ", " ")), "total of images in this gallery: ", " ")),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr("created on ", " by <") extr("created on ", " by <")
.replace("th, ", " ", 1).replace("nd, ", " ", 1) .replace("th, ", " ", 1).replace("nd, ", " ", 1)
.replace("st, ", " ", 1), "%B %d %Y at %H:%M"), .replace("st, ", " ", 1), "%B %d %Y at %H:%M"),

View File

@@ -38,7 +38,7 @@ class ImgurExtractor(Extractor):
image["url"] = url = \ image["url"] = url = \
f"https://i.imgur.com/{image['id']}.{image['ext']}" f"https://i.imgur.com/{image['id']}.{image['ext']}"
image["date"] = text.parse_datetime(image["created_at"]) image["date"] = self.parse_datetime(image["created_at"])
image["_http_validate"] = self._validate image["_http_validate"] = self._validate
text.nameext_from_url(url, image) text.nameext_from_url(url, image)
@@ -106,7 +106,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
del album["media"] del album["media"]
count = len(images) count = len(images)
album["date"] = text.parse_datetime(album["created_at"]) album["date"] = self.parse_datetime(album["created_at"])
try: try:
del album["ad_url"] del album["ad_url"]

View File

@@ -35,7 +35,7 @@ class InkbunnyExtractor(Extractor):
for post in self.posts(): for post in self.posts():
post.update(metadata) post.update(metadata)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
post["tags"] = [kw["keyword_name"] for kw in post["keywords"]] post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
post["ratings"] = [r["name"] for r in post["ratings"]] post["ratings"] = [r["name"] for r in post["ratings"]]
@@ -52,7 +52,7 @@ class InkbunnyExtractor(Extractor):
for post["num"], file in enumerate(files, 1): for post["num"], file in enumerate(files, 1):
post.update(file) post.update(file)
post["deleted"] = (file["deleted"] == "t") post["deleted"] = (file["deleted"] == "t")
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
text.nameext_from_url(file["file_name"], post) text.nameext_from_url(file["file_name"], post)

View File

@@ -173,7 +173,7 @@ class InstagramExtractor(Extractor):
post_url = f"{self.root}/stories/highlights/{reel_id}/" post_url = f"{self.root}/stories/highlights/{reel_id}/"
data = { data = {
"user" : post.get("user"), "user" : post.get("user"),
"expires": text.parse_timestamp(expires), "expires": self.parse_timestamp(expires),
"post_id": reel_id, "post_id": reel_id,
"post_shortcode": shortcode_from_id(reel_id), "post_shortcode": shortcode_from_id(reel_id),
"post_url": post_url, "post_url": post_url,
@@ -224,7 +224,7 @@ class InstagramExtractor(Extractor):
data["owner_id"] = owner["pk"] data["owner_id"] = owner["pk"]
data["username"] = owner.get("username") data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name") data["fullname"] = owner.get("full_name")
data["post_date"] = data["date"] = text.parse_timestamp( data["post_date"] = data["date"] = self.parse_timestamp(
post.get("taken_at") or post.get("created_at") or post.get("seen")) post.get("taken_at") or post.get("created_at") or post.get("seen"))
data["_files"] = files = [] data["_files"] = files = []
for num, item in enumerate(items, 1): for num, item in enumerate(items, 1):
@@ -269,7 +269,7 @@ class InstagramExtractor(Extractor):
media = { media = {
"num" : num, "num" : num,
"date" : text.parse_timestamp(item.get("taken_at") or "date" : self.parse_timestamp(item.get("taken_at") or
media.get("taken_at") or media.get("taken_at") or
post.get("taken_at")), post.get("taken_at")),
"media_id" : item["pk"], "media_id" : item["pk"],
@@ -288,7 +288,7 @@ class InstagramExtractor(Extractor):
if "reshared_story_media_author" in item: if "reshared_story_media_author" in item:
media["author"] = item["reshared_story_media_author"] media["author"] = item["reshared_story_media_author"]
if "expiring_at" in item: if "expiring_at" in item:
media["expires"] = text.parse_timestamp(post["expiring_at"]) media["expires"] = self.parse_timestamp(post["expiring_at"])
self._extract_tagged_users(item, media) self._extract_tagged_users(item, media)
files.append(media) files.append(media)
@@ -331,7 +331,7 @@ class InstagramExtractor(Extractor):
"post_id" : post["id"], "post_id" : post["id"],
"post_shortcode": post["shortcode"], "post_shortcode": post["shortcode"],
"post_url" : f"{self.root}/p/{post['shortcode']}/", "post_url" : f"{self.root}/p/{post['shortcode']}/",
"post_date" : text.parse_timestamp(post["taken_at_timestamp"]), "post_date" : self.parse_timestamp(post["taken_at_timestamp"]),
"description": text.parse_unicode_escapes("\n".join( "description": text.parse_unicode_escapes("\n".join(
edge["node"]["text"] edge["node"]["text"]
for edge in post["edge_media_to_caption"]["edges"] for edge in post["edge_media_to_caption"]["edges"]
@@ -623,7 +623,7 @@ class InstagramStoriesTrayExtractor(InstagramExtractor):
def items(self): def items(self):
base = f"{self.root}/stories/id:" base = f"{self.root}/stories/id:"
for story in self.api.reels_tray(): for story in self.api.reels_tray():
story["date"] = text.parse_timestamp(story["latest_reel_media"]) story["date"] = self.parse_timestamp(story["latest_reel_media"])
story["_extractor"] = InstagramStoriesExtractor story["_extractor"] = InstagramStoriesExtractor
yield Message.Queue, f"{base}{story['id']}/", story yield Message.Queue, f"{base}{story['id']}/", story

View File

@@ -36,7 +36,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
'{"":' + data.replace('\\"', '"'))) '{"":' + data.replace('\\"', '"')))
doc = data["initialDocumentData"]["document"] doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime( doc["date"] = self.parse_datetime(
doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ") doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ")
self.count = text.parse_int(doc["pageCount"]) self.count = text.parse_int(doc["pageCount"])

View File

@@ -32,7 +32,7 @@ class ItakuExtractor(Extractor):
def items(self): def items(self):
if images := self.images(): if images := self.images():
for image in images: for image in images:
image["date"] = text.parse_datetime( image["date"] = self.parse_datetime(
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ") image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
for category, tags in image.pop("categorized_tags").items(): for category, tags in image.pop("categorized_tags").items():
image[f"tags_{category.lower()}"] = [ image[f"tags_{category.lower()}"] = [
@@ -60,14 +60,14 @@ class ItakuExtractor(Extractor):
for post in posts: for post in posts:
images = post.pop("gallery_images") or () images = post.pop("gallery_images") or ()
post["count"] = len(images) post["count"] = len(images)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ") post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["tags"] = [t["name"] for t in post["tags"]] post["tags"] = [t["name"] for t in post["tags"]]
yield Message.Directory, post yield Message.Directory, post
for post["num"], image in enumerate(images, 1): for post["num"], image in enumerate(images, 1):
post["file"] = image post["file"] = image
image["date"] = text.parse_datetime( image["date"] = self.parse_datetime(
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ") image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
url = image["image"] url = image["image"]

View File

@@ -122,9 +122,9 @@ class IwaraExtractor(Extractor):
info["file_id"] = file_info.get("id") info["file_id"] = file_info.get("id")
info["filename"] = filename info["filename"] = filename
info["extension"] = extension info["extension"] = extension
info["date"] = text.parse_datetime( info["date"] = self.parse_datetime(
file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ") file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
info["date_updated"] = text.parse_datetime( info["date_updated"] = self.parse_datetime(
file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ") file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
info["mime"] = file_info.get("mime") info["mime"] = file_info.get("mime")
info["size"] = file_info.get("size") info["size"] = file_info.get("size")
@@ -144,7 +144,7 @@ class IwaraExtractor(Extractor):
"status" : user.get("status"), "status" : user.get("status"),
"role" : user.get("role"), "role" : user.get("role"),
"premium": user.get("premium"), "premium": user.get("premium"),
"date" : text.parse_datetime( "date" : self.parse_datetime(
user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"), user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"),
"description": profile.get("body"), "description": profile.get("body"),
} }

View File

@@ -32,7 +32,7 @@ class KabeuchiUserExtractor(Extractor):
if post.get("is_ad") or not post["image1"]: if post.get("is_ad") or not post["image1"]:
continue continue
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S") post["created_at"], "%Y-%m-%d %H:%M:%S")
yield Message.Directory, post yield Message.Directory, post

View File

@@ -238,7 +238,7 @@ class KemonoExtractor(Extractor):
def _parse_datetime(self, date_string): def _parse_datetime(self, date_string):
if len(date_string) > 19: if len(date_string) > 19:
date_string = date_string[:19] date_string = date_string[:19]
return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S") return self.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
def _revisions(self, posts): def _revisions(self, posts):
return itertools.chain.from_iterable( return itertools.chain.from_iterable(

View File

@@ -119,7 +119,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
'property="image:width" content="', '"')), 'property="image:width" content="', '"')),
"height": text.parse_int(extr( "height": text.parse_int(extr(
'property="image:height" content="', '"')), 'property="image:height" content="', '"')),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"), '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
} }

View File

@@ -45,7 +45,7 @@ class LivedoorExtractor(Extractor):
"title" : text.unescape(extr('dc:title="', '"')), "title" : text.unescape(extr('dc:title="', '"')),
"categories" : extr('dc:subject="', '"').partition(",")[::2], "categories" : extr('dc:subject="', '"').partition(",")[::2],
"description": extr('dc:description="', '"'), "description": extr('dc:description="', '"'),
"date" : text.parse_datetime(extr('dc:date="', '"')), "date" : self.parse_datetime(extr('dc:date="', '"')),
"tags" : text.split_html(tags)[1:] if tags else [], "tags" : text.split_html(tags)[1:] if tags else [],
"user" : self.user, "user" : self.user,
"body" : body, "body" : body,

View File

@@ -29,7 +29,7 @@ class LofterExtractor(Extractor):
post = post["post"] post = post["post"]
post["blog_name"] = post["blogInfo"]["blogName"] post["blog_name"] = post["blogInfo"]["blogName"]
post["date"] = text.parse_timestamp(post["publishTime"] // 1000) post["date"] = self.parse_timestamp(post["publishTime"] // 1000)
post_type = post["type"] post_type = post["type"]
# Article # Article

View File

@@ -69,7 +69,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
image["thumbnail"] = "" image["thumbnail"] = ""
image["tags"] = [item["text"] for item in image["tags"]] image["tags"] = [item["text"] for item in image["tags"]]
image["date"] = text.parse_timestamp(image["created"]) image["date"] = self.parse_timestamp(image["created"])
image["id"] = text.parse_int(image["id"]) image["id"] = text.parse_int(image["id"])
url = (image["url_to_original"] or image["url_to_video"] url = (image["url_to_original"] or image["url_to_video"]
@@ -188,7 +188,7 @@ fragment AlbumStandard on Album {
album["created_by"] = album["created_by"]["display_name"] album["created_by"] = album["created_by"]["display_name"]
album["id"] = text.parse_int(album["id"]) album["id"] = text.parse_int(album["id"])
album["date"] = text.parse_timestamp(album["created"]) album["date"] = self.parse_timestamp(album["created"])
return album return album

View File

@@ -47,7 +47,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
"path": text.unescape(extr('href="', '"')), "path": text.unescape(extr('href="', '"')),
"chapter_string": text.unescape(extr(">", "<")), "chapter_string": text.unescape(extr(">", "<")),
"size": text.parse_bytes(extr("<td>", "</td>")), "size": text.parse_bytes(extr("<td>", "</td>")),
"date": text.parse_datetime( "date": self.parse_datetime(
extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"), extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
}) })

View File

@@ -68,7 +68,7 @@ class MangadexExtractor(Extractor):
"chapter" : text.parse_int(chnum), "chapter" : text.parse_int(chnum),
"chapter_minor": f"{sep}{minor}", "chapter_minor": f"{sep}{minor}",
"chapter_id": chapter["id"], "chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]), "date" : self.parse_datetime(cattributes["publishAt"]),
"group" : [group["attributes"]["name"] "group" : [group["attributes"]["name"]
for group in relationships["scanlation_group"]], for group in relationships["scanlation_group"]],
"lang" : lang, "lang" : lang,
@@ -109,8 +109,8 @@ class MangadexCoversExtractor(MangadexExtractor):
"cover" : cattributes["fileName"], "cover" : cattributes["fileName"],
"lang" : cattributes.get("locale"), "lang" : cattributes.get("locale"),
"volume" : text.parse_int(cattributes["volume"]), "volume" : text.parse_int(cattributes["volume"]),
"date" : text.parse_datetime(cattributes["createdAt"]), "date" : self.parse_datetime(cattributes["createdAt"]),
"date_updated": text.parse_datetime(cattributes["updatedAt"]), "date_updated": self.parse_datetime(cattributes["updatedAt"]),
} }
@@ -454,7 +454,7 @@ def _manga_info(self, uuid):
"manga_id": manga["id"], "manga_id": manga["id"],
"manga_titles": [t.popitem()[1] "manga_titles": [t.popitem()[1]
for t in mattr.get("altTitles") or ()], for t in mattr.get("altTitles") or ()],
"manga_date" : text.parse_datetime(mattr.get("createdAt")), "manga_date" : self.parse_datetime(mattr.get("createdAt")),
"description" : (mattr["description"].get("en") or "description" : (mattr["description"].get("en") or
next(iter(mattr["description"].values()), "")), next(iter(mattr["description"].values()), "")),
"demographic": mattr.get("publicationDemographic"), "demographic": mattr.get("publicationDemographic"),

View File

@@ -99,7 +99,7 @@ class MangafoxMangaExtractor(MangaExtractor):
"chapter" : text.parse_int(chapter), "chapter" : text.parse_int(chapter),
"chapter_minor" : minor or "", "chapter_minor" : minor or "",
"chapter_string": cstr, "chapter_string": cstr,
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('right">', '</span>'), "%b %d, %Y"), extr('right">', '</span>'), "%b %d, %Y"),
} }
chapter.update(data) chapter.update(data)

View File

@@ -50,9 +50,9 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
extr = text.extract_from(page) extr = text.extract_from(page)
data = { data = {
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), '"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
"date_updated": text.parse_datetime(extr( "date_updated": self.parse_datetime(extr(
'"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), '"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
"manga_id" : text.parse_int(extr("comic_id =", ";")), "manga_id" : text.parse_int(extr("comic_id =", ";")),
"chapter_id" : text.parse_int(extr("chapter_id =", ";")), "chapter_id" : text.parse_int(extr("chapter_id =", ";")),
@@ -99,7 +99,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
manga = text.unescape(extr("<h1>", "<")) manga = text.unescape(extr("<h1>", "<"))
author = text.remove_html(extr("<li>Author(s) :", "</a>")) author = text.remove_html(extr("<li>Author(s) :", "</a>"))
status = extr("<li>Status :", "<").strip() status = extr("<li>Status :", "<").strip()
update = text.parse_datetime(extr( update = self.parse_datetime(extr(
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p") "<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p")
tags = text.split_html(extr(">Genres :", "</li>"))[::2] tags = text.split_html(extr(">Genres :", "</li>"))[::2]
@@ -121,7 +121,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
"chapter" : text.parse_int(chapter), "chapter" : text.parse_int(chapter),
"chapter_minor": (sep and ".") + minor, "chapter_minor": (sep and ".") + minor,
"title" : title.partition(": ")[2], "title" : title.partition(": ")[2],
"date" : text.parse_datetime(date, "%b-%d-%Y %H:%M"), "date" : self.parse_datetime(date, "%b-%d-%Y %H:%M"),
"lang" : "en", "lang" : "en",
"language": "English", "language": "English",
})) }))

View File

@@ -101,7 +101,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"language" : util.code_to_language(lang), "language" : util.code_to_language(lang),
"source" : chapter["srcTitle"], "source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"], "source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(chapter["dateCreate"] // 1000), "date" : self.parse_timestamp(chapter["dateCreate"] // 1000),
} }
def images(self, _): def images(self, _):
@@ -138,7 +138,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"language" : util.code_to_language(lang), "language" : util.code_to_language(lang),
"source" : chapter["srcTitle"], "source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"], "source_id" : chapter["sourceId"],
"date" : text.parse_timestamp( "date" : self.parse_timestamp(
chapter["dateCreate"] // 1000), chapter["dateCreate"] // 1000),
"_extractor": MangaparkChapterExtractor, "_extractor": MangaparkChapterExtractor,
} }

View File

@@ -40,9 +40,9 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
"chapter_minor": str(round(minor, 5))[1:] if minor else "", "chapter_minor": str(round(minor, 5))[1:] if minor else "",
"chapter_id" : text.parse_int(chapter_id), "chapter_id" : text.parse_int(chapter_id),
"chapter_url" : comic["url"], "chapter_url" : comic["url"],
"date" : text.parse_datetime( "date" : self.parse_datetime(
comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"), comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
"date_updated" : text.parse_datetime( "date_updated" : self.parse_datetime(
comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"), comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
} }

View File

@@ -119,7 +119,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
"album": { "album": {
"id": self.album_id, "id": self.album_id,
"name": text.unescape(title), "name": text.unescape(title),
"date": text.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"), "date": self.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
"description": text.unescape(descr), "description": text.unescape(descr),
}, },
"count": text.parse_int(count), "count": text.parse_int(count),

View File

@@ -64,7 +64,7 @@ class MastodonExtractor(BaseExtractor):
status["count"] = len(attachments) status["count"] = len(attachments)
status["tags"] = [tag["name"] for tag in status["tags"]] status["tags"] = [tag["name"] for tag in status["tags"]]
status["date"] = text.parse_datetime( status["date"] = self.parse_datetime(
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S") status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, status yield Message.Directory, status
@@ -319,7 +319,7 @@ class MastodonAPI():
if code == 404: if code == 404:
raise exception.NotFoundError() raise exception.NotFoundError()
if code == 429: if code == 429:
self.extractor.wait(until=text.parse_datetime( self.extractor.wait(until=self.parse_datetime(
response.headers["x-ratelimit-reset"], response.headers["x-ratelimit-reset"],
"%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S.%fZ",
)) ))

View File

@@ -48,12 +48,12 @@ class MisskeyExtractor(BaseExtractor):
note["instance"] = self.instance note["instance"] = self.instance
note["instance_remote"] = note["user"]["host"] note["instance_remote"] = note["user"]["host"]
note["count"] = len(files) note["count"] = len(files)
note["date"] = text.parse_datetime( note["date"] = self.parse_datetime(
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z") note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield Message.Directory, note yield Message.Directory, note
for note["num"], file in enumerate(files, 1): for note["num"], file in enumerate(files, 1):
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z") file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
note["file"] = file note["file"] = file
url = file["url"] url = file["url"]

View File

@@ -31,16 +31,16 @@ class NaverChzzkExtractor(Extractor):
data["uid"] = data["objectId"] data["uid"] = data["objectId"]
data["user"] = comment["user"] data["user"] = comment["user"]
data["count"] = len(files) data["count"] = len(files)
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime(
data["createdDate"], "%Y%m%d%H%M%S") data["createdDate"], "%Y%m%d%H%M%S")
yield Message.Directory, data yield Message.Directory, data
for data["num"], file in enumerate(files, 1): for data["num"], file in enumerate(files, 1):
if extra := file.get("extraJson"): if extra := file.get("extraJson"):
file.update(util.json_loads(extra)) file.update(util.json_loads(extra))
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime(
file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z") file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date_updated"] = text.parse_datetime( file["date_updated"] = self.parse_datetime(
file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z") file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
data["file"] = file data["file"] = file
url = file["attachValue"] url = file["attachValue"]

View File

@@ -59,7 +59,7 @@ class NekohousePostExtractor(NekohouseExtractor):
'class="scrape__user-name', '</').rpartition(">")[2].strip()), 'class="scrape__user-name', '</').rpartition(">")[2].strip()),
"title" : text.unescape(extr( "title" : text.unescape(extr(
'class="scrape__title', '</').rpartition(">")[2]), 'class="scrape__title', '</').rpartition(">")[2]),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"), 'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"),
"content": text.unescape(extr( "content": text.unescape(extr(
'class="scrape__content">', "</div>").strip()), 'class="scrape__content">', "</div>").strip()),

View File

@@ -218,7 +218,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),
"type" : "art", "type" : "art",
"_type" : "i", "_type" : "i",
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'itemprop="datePublished" content="', '"')), 'itemprop="datePublished" content="', '"')),
"rating" : extr('class="rated-', '"'), "rating" : extr('class="rated-', '"'),
"url" : full('src="', '"'), "url" : full('src="', '"'),
@@ -268,7 +268,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),
"type" : "audio", "type" : "audio",
"_type" : "a", "_type" : "a",
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'itemprop="datePublished" content="', '"')), 'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"), "url" : extr('{"url":"', '"').replace("\\/", "/"),
"index" : text.parse_int(index), "index" : text.parse_int(index),
@@ -287,7 +287,7 @@ class NewgroundsExtractor(Extractor):
src = src.replace("\\/", "/") src = src.replace("\\/", "/")
formats = () formats = ()
type = extr(',"description":"', '"') type = extr(',"description":"', '"')
date = text.parse_datetime(extr( date = self.parse_datetime(extr(
'itemprop="datePublished" content="', '"')) 'itemprop="datePublished" content="', '"'))
if type: if type:
type = type.rpartition(" ")[2].lower() type = type.rpartition(" ")[2].lower()
@@ -302,7 +302,7 @@ class NewgroundsExtractor(Extractor):
sources = self.request_json(url, headers=headers)["sources"] sources = self.request_json(url, headers=headers)["sources"]
formats = self._video_formats(sources) formats = self._video_formats(sources)
src = next(formats, "") src = next(formats, "")
date = text.parse_timestamp(src.rpartition("?")[2]) date = self.parse_timestamp(src.rpartition("?")[2])
type = "movie" type = "movie"
return { return {

View File

@@ -82,7 +82,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"title" : keywords[0].strip(), "title" : keywords[0].strip(),
"description": text.unescape(extr( "description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")), '"description": "', '"').replace("&amp;", "&")),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9), '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
"artist_id" : text.parse_int(extr('/members.php?id=', '"')), "artist_id" : text.parse_int(extr('/members.php?id=', '"')),
"artist_name": keywords[1], "artist_name": keywords[1],
@@ -101,7 +101,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"artist_id" : text.parse_int(extr('members.php?id=', '"')), "artist_id" : text.parse_int(extr('members.php?id=', '"')),
"artist_name": keywords[1], "artist_name": keywords[1],
"tags" : keywords[2:-1], "tags" : keywords[2:-1],
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
"itemprop='datePublished' content=", "<").rpartition(">")[2], "itemprop='datePublished' content=", "<").rpartition(">")[2],
"%Y-%m-%d %H:%M:%S", 9), "%Y-%m-%d %H:%M:%S", 9),
} }

View File

@@ -114,7 +114,7 @@ class NitterExtractor(BaseExtractor):
return { return {
"author" : author, "author" : author,
"user" : self.user_obj or author, "user" : self.user_obj or author,
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"), extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0], "tweet_id": link.rpartition("/")[2].partition("#")[0],
"content": extr('class="tweet-content', "</div").partition(">")[2], "content": extr('class="tweet-content', "</div").partition(">")[2],
@@ -142,7 +142,7 @@ class NitterExtractor(BaseExtractor):
return { return {
"author" : author, "author" : author,
"user" : self.user_obj or author, "user" : self.user_obj or author,
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"), extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0], "tweet_id": link.rpartition("/")[2].partition("#")[0],
"content" : extr('class="quote-text', "</div").partition(">")[2], "content" : extr('class="quote-text', "</div").partition(">")[2],
@@ -173,7 +173,7 @@ class NitterExtractor(BaseExtractor):
"nick" : extr('title="', '"'), "nick" : extr('title="', '"'),
"name" : extr('title="@', '"'), "name" : extr('title="@', '"'),
"description" : extr('<p dir="auto">', '<'), "description" : extr('<p dir="auto">', '<'),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('class="profile-joindate"><span title="', '"'), extr('class="profile-joindate"><span title="', '"'),
"%I:%M %p - %d %b %Y"), "%I:%M %p - %d %b %Y"),
"statuses_count" : text.parse_int(extr( "statuses_count" : text.parse_int(extr(

View File

@@ -49,7 +49,7 @@ class NozomiExtractor(Extractor):
post["character"] = self._list(post.get("character")) post["character"] = self._list(post.get("character"))
try: try:
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z") post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
except Exception: except Exception:
post["date"] = None post["date"] = None

View File

@@ -53,7 +53,7 @@ class PahealExtractor(Extractor):
extr("<source src='", "'")), extr("<source src='", "'")),
"uploader": text.unquote(extr( "uploader": text.unquote(extr(
"class='username' href='/user/", "'")), "class='username' href='/user/", "'")),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"), extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
"source" : text.unescape(text.extr( "source" : text.unescape(text.extr(
extr(">Source Link<", "</td>"), "href='", "'")), extr(">Source Link<", "</td>"), "href='", "'")),
@@ -133,7 +133,7 @@ class PahealTagExtractor(PahealExtractor):
"duration" : text.parse_float(duration[:-1]), "duration" : text.parse_float(duration[:-1]),
"tags" : text.unescape(tags), "tags" : text.unescape(tags),
"size" : text.parse_bytes(size[:-1]), "size" : text.parse_bytes(size[:-1]),
"date" : text.parse_datetime(date, "%B %d, %Y; %H:%M"), "date" : self.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : f"{pid} - {tags}", "filename" : f"{pid} - {tags}",
"extension": ext, "extension": ext,
} }

View File

@@ -177,7 +177,7 @@ class PatreonExtractor(Extractor):
post, included, "attachments") post, included, "attachments")
attr["attachments_media"] = self._files( attr["attachments_media"] = self._files(
post, included, "attachments_media") post, included, "attachments_media")
attr["date"] = text.parse_datetime( attr["date"] = self.parse_datetime(
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
try: try:
@@ -226,7 +226,7 @@ class PatreonExtractor(Extractor):
user = response.json()["data"] user = response.json()["data"]
attr = user["attributes"] attr = user["attributes"]
attr["id"] = user["id"] attr["id"] = user["id"]
attr["date"] = text.parse_datetime( attr["date"] = self.parse_datetime(
attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z") attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
return attr return attr
@@ -236,7 +236,7 @@ class PatreonExtractor(Extractor):
coll = data["data"] coll = data["data"]
attr = coll["attributes"] attr = coll["attributes"]
attr["id"] = coll["id"] attr["id"] = coll["id"]
attr["date"] = text.parse_datetime( attr["date"] = self.parse_datetime(
attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
return attr return attr

View File

@@ -35,7 +35,7 @@ class PexelsExtractor(Extractor):
post["type"] = attr["type"] post["type"] = attr["type"]
post.update(metadata) post.update(metadata)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S") post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
if "image" in post: if "image" in post:

View File

@@ -36,7 +36,7 @@ class PhilomenaExtractor(BooruExtractor):
return url return url
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"][:19], "%Y-%m-%dT%H:%M:%S") post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")

View File

@@ -29,7 +29,7 @@ class PhotovogueUserExtractor(Extractor):
for photo in self.photos(): for photo in self.photos():
url = photo["gallery_image"] url = photo["gallery_image"]
photo["title"] = photo["title"].strip() photo["title"] = photo["title"].strip()
photo["date"] = text.parse_datetime( photo["date"] = self.parse_datetime(
photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z") photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield Message.Directory, photo yield Message.Directory, photo

View File

@@ -29,7 +29,7 @@ class PicartoGalleryExtractor(Extractor):
def items(self): def items(self):
for post in self.posts(): for post in self.posts():
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S") post["created_at"], "%Y-%m-%d %H:%M:%S")
variations = post.pop("variations", ()) variations = post.pop("variations", ())
yield Message.Directory, post yield Message.Directory, post

View File

@@ -26,7 +26,7 @@ class PiczelExtractor(Extractor):
def items(self): def items(self):
for post in self.posts(): for post in self.posts():
post["tags"] = [t["title"] for t in post["tags"] if t["title"]] post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
if post["multi"]: if post["multi"]:

View File

@@ -48,7 +48,7 @@ class PillowfortExtractor(Extractor):
for url in inline(post["content"]): for url in inline(post["content"]):
files.append({"url": url}) files.append({"url": url})
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["post_id"] = post.pop("id") post["post_id"] = post.pop("id")
post["count"] = len(files) post["count"] = len(files)
@@ -76,7 +76,7 @@ class PillowfortExtractor(Extractor):
if "id" not in file: if "id" not in file:
post["id"] = post["hash"] post["id"] = post["hash"]
if "created_at" in file: if "created_at" in file:
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield msgtype, url, post yield msgtype, url, post

View File

@@ -24,8 +24,8 @@ class PixeldrainExtractor(Extractor):
if api_key := self.config("api-key"): if api_key := self.config("api-key"):
self.session.auth = util.HTTPBasicAuth("", api_key) self.session.auth = util.HTTPBasicAuth("", api_key)
def parse_datetime(self, date_string): def _parse_datetime(self, date_string):
return text.parse_datetime( return self.parse_datetime(
date_string, "%Y-%m-%dT%H:%M:%S.%fZ") date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
@@ -45,7 +45,7 @@ class PixeldrainFileExtractor(PixeldrainExtractor):
file = self.request_json(url + "/info") file = self.request_json(url + "/info")
file["url"] = url + "?download" file["url"] = url + "?download"
file["date"] = self.parse_datetime(file["date_upload"]) file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file) text.nameext_from_url(file["name"], file)
yield Message.Directory, file yield Message.Directory, file
@@ -72,7 +72,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
files = album["files"] files = album["files"]
album["count"] = album["file_count"] album["count"] = album["file_count"]
album["date"] = self.parse_datetime(album["date_created"]) album["date"] = self.parse_datetime_iso(album["date_created"])
if self.file_index: if self.file_index:
idx = text.parse_int(self.file_index) idx = text.parse_int(self.file_index)
@@ -91,7 +91,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
file["album"] = album file["album"] = album
file["num"] = num file["num"] = num
file["url"] = url = f"{self.root}/api/file/{file['id']}?download" file["url"] = url = f"{self.root}/api/file/{file['id']}?download"
file["date"] = self.parse_datetime(file["date_upload"]) file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file) text.nameext_from_url(file["name"], file)
yield Message.Url, url, file yield Message.Url, url, file
@@ -112,7 +112,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor):
"mime_type" : data["file_type"], "mime_type" : data["file_type"],
"size" : data["file_size"], "size" : data["file_size"],
"hash_sha256": data["sha256_sum"], "hash_sha256": data["sha256_sum"],
"date" : self.parse_datetime(data["created"]), "date" : self.parse_datetime_iso(data["created"]),
} }
def items(self): def items(self):

View File

@@ -150,7 +150,7 @@ class PornhubGifExtractor(PornhubExtractor):
"tags" : extr("data-context-tag='", "'").split(","), "tags" : extr("data-context-tag='", "'").split(","),
"title": extr('"name": "', '"'), "title": extr('"name": "', '"'),
"url" : extr('"contentUrl": "', '"'), "url" : extr('"contentUrl": "', '"'),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('"uploadDate": "', '"'), "%Y-%m-%d"), extr('"uploadDate": "', '"'), "%Y-%m-%d"),
"viewkey" : extr('From this video: ' "viewkey" : extr('From this video: '
'<a href="/view_video.php?viewkey=', '"'), '<a href="/view_video.php?viewkey=', '"'),

View File

@@ -31,7 +31,7 @@ class PostmillExtractor(BaseExtractor):
title = text.unescape(extr( title = text.unescape(extr(
'<meta property="og:title" content="', '">')) '<meta property="og:title" content="', '">'))
date = text.parse_datetime(extr( date = self.parse_datetime(extr(
'<meta property="og:article:published_time" content="', '">')) '<meta property="og:article:published_time" content="', '">'))
username = extr( username = extr(
'<meta property="og:article:author" content="', '">') '<meta property="og:article:author" content="', '">')

View File

@@ -42,7 +42,7 @@ class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor):
"chapter_minor": sep + minor, "chapter_minor": sep + minor,
"chapter_id" : text.parse_int(item["cid"]), "chapter_id" : text.parse_int(item["cid"]),
"title" : text.unescape(title), "title" : text.unescape(title),
"date" : text.parse_datetime( "date" : self.parse_datetime(
date, "%Y-%m-%dWIB%H:%M:%S%z"), date, "%Y-%m-%dWIB%H:%M:%S%z"),
"thumbnail" : item.get("t"), "thumbnail" : item.get("t"),
"lang" : "ja", "lang" : "ja",

View File

@@ -97,7 +97,7 @@ class ReactorExtractor(BaseExtractor):
return return
num = 0 num = 0
date = text.parse_datetime(data["datePublished"]) date = self.parse_datetime(data["datePublished"])
user = data["author"]["name"] user = data["author"]["name"]
description = text.unescape(data["description"]) description = text.unescape(data["description"])
title, _, tags = text.unescape(data["headline"]).partition(" / ") title, _, tags = text.unescape(data["headline"]).partition(" / ")

View File

@@ -48,7 +48,7 @@ class RealbooruExtractor(booru.BooruExtractor):
return num return num
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_datetime(post["created_at"], "%b, %d %Y") post["date"] = self.parse_datetime(post["created_at"], "%b, %d %Y")
def _pagination(self, params, begin, end): def _pagination(self, params, begin, end):
url = self.root + "/index.php" url = self.root + "/index.php"

View File

@@ -90,7 +90,7 @@ class RedbustGalleryExtractor(GalleryExtractor, RedbustExtractor):
"categories" : text.split_html(extr( "categories" : text.split_html(extr(
'<li class="category">', "</li>"))[::2], '<li class="category">', "</li>"))[::2],
"title" : text.unescape(extr('class="post-title">', "<")), "title" : text.unescape(extr('class="post-title">', "<")),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('class="post-byline">', "<").strip(), "%B %d, %Y"), extr('class="post-byline">', "<").strip(), "%B %d, %Y"),
"views" : text.parse_int(extr("</b>", "v").replace(",", "")), "views" : text.parse_int(extr("</b>", "v").replace(",", "")),
"tags" : text.split_html(extr( "tags" : text.split_html(extr(

View File

@@ -57,7 +57,7 @@ class RedditExtractor(Extractor):
if submission: if submission:
submission["comment"] = None submission["comment"] = None
submission["date"] = text.parse_timestamp( submission["date"] = self.parse_timestamp(
submission["created_utc"]) submission["created_utc"])
yield Message.Directory, submission yield Message.Directory, submission
visited.add(submission["id"]) visited.add(submission["id"])
@@ -124,7 +124,7 @@ class RedditExtractor(Extractor):
data = submission.copy() data = submission.copy()
data["comment"] = comment data["comment"] = comment
comment["date"] = text.parse_timestamp( comment["date"] = self.parse_timestamp(
comment["created_utc"]) comment["created_utc"])
if media: if media:

View File

@@ -51,7 +51,7 @@ class RedgifsExtractor(Extractor):
gif.update(metadata) gif.update(metadata)
gif["count"] = cnt gif["count"] = cnt
gif["date"] = text.parse_timestamp(gif.get("createDate")) gif["date"] = self.parse_timestamp(gif.get("createDate"))
yield Message.Directory, gif yield Message.Directory, gif
for num, gif in enumerate(gifs, enum): for num, gif in enumerate(gifs, enum):

View File

@@ -36,7 +36,7 @@ class Rule34vaultExtractor(BooruExtractor):
def _prepare(self, post): def _prepare(self, post):
post.pop("files", None) post.pop("files", None)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ") post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
if "tags" in post: if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]] post["tags"] = [t["value"] for t in post["tags"]]

View File

@@ -68,7 +68,7 @@ class Rule34xyzExtractor(BooruExtractor):
def _prepare(self, post): def _prepare(self, post):
post.pop("files", None) post.pop("files", None)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ") post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["filename"], _, post["format"] = post["filename"].rpartition(".") post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post: if "tags" in post:

View File

@@ -30,9 +30,9 @@ class S3ndpicsExtractor(Extractor):
for post in self.posts(): for post in self.posts():
post["id"] = post.pop("_id", None) post["id"] = post.pop("_id", None)
post["user"] = post.pop("userId", None) post["user"] = post.pop("userId", None)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") post["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date_updated"] = text.parse_datetime( post["date_updated"] = self.parse_datetime(
post["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") post["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
files = post.pop("files", ()) files = post.pop("files", ())

View File

@@ -36,7 +36,7 @@ class SaintAlbumExtractor(LolisafeAlbumExtractor):
break break
files.append({ files.append({
"id2" : id2, "id2" : id2,
"date" : text.parse_timestamp(extr("", ".")), "date" : self.parse_timestamp(extr("", ".")),
"id" : extr("/embed/", '"'), "id" : extr("/embed/", '"'),
"size" : text.parse_int(extr('data="', '"')), "size" : text.parse_int(extr('data="', '"')),
"file" : text.unescape(extr( "file" : text.unescape(extr(
@@ -73,7 +73,7 @@ class SaintMediaExtractor(SaintAlbumExtractor):
file = { file = {
"id" : album_id, "id" : album_id,
"id2" : extr("/thumbs/", "-"), "id2" : extr("/thumbs/", "-"),
"date" : text.parse_timestamp(extr("", ".")), "date" : self.parse_timestamp(extr("", ".")),
"file" : text.unescape(extr('<source src="', '"')), "file" : text.unescape(extr('<source src="', '"')),
"id_dl": extr("/d/", "'"), "id_dl": extr("/d/", "'"),
} }

Some files were not shown because too many files have changed in this diff Show More