[dt] use 'parse_datetime_iso()' for ISO formats

This commit is contained in:
Mike Fährmann
2025-10-19 19:31:31 +02:00
parent 5802107cdf
commit c38856bd3f
57 changed files with 102 additions and 165 deletions

View File

@@ -85,8 +85,7 @@ class _8musesAlbumExtractor(Extractor):
"parent" : text.parse_int(album["parentId"]),
"views" : text.parse_int(album["numberViews"]),
"likes" : text.parse_int(album["numberLikes"]),
"date" : self.parse_datetime(
album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"),
"date" : self.parse_datetime_iso(album["updatedAt"]),
}
def _unobfuscate(self, data):

View File

@@ -182,10 +182,10 @@ class Ao3WorkExtractor(Ao3Extractor):
extr('<dd class="freeform tags">', "</dd>")),
"lang" : extr('<dd class="language" lang="', '"'),
"series" : extr('<dd class="series">', "</dd>"),
"date" : self.parse_datetime(
extr('<dd class="published">', "<"), "%Y-%m-%d"),
"date_completed": self.parse_datetime(
extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"),
"date" : self.parse_datetime_iso(extr(
'<dd class="published">', "<")),
"date_completed": self.parse_datetime_iso(extr(
'>Completed:</dt><dd class="status">', "<")),
"date_updated" : self.parse_timestamp(
path.rpartition("updated_at=")[2]),
"words" : text.parse_int(

View File

@@ -49,8 +49,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
files = self._extract_files(post)
post["count"] = len(files)
post["date"] = self.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
post["post_url"] = post_url = \
f"{self.root}/b/{post['boardSlug']}/{post['id']}"
post["_http_headers"] = {"Referer": post_url + "?p=1"}

View File

@@ -126,8 +126,7 @@ class ArtstationExtractor(Extractor):
data["title"] = text.unescape(data["title"])
data["description"] = text.unescape(text.remove_html(
data["description"]))
data["date"] = self.parse_datetime(
data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
data["date"] = self.parse_datetime_iso(data["created_at"])
assets = data["assets"]
del data["assets"]

View File

@@ -167,8 +167,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
data["date"] = self.parse_datetime(
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
data["date"] = self.parse_datetime_iso(extr('time="', '"'))
url = f"{self.root}/title/{href}"
results.append((url, data.copy()))

View File

@@ -135,8 +135,7 @@ class BlueskyExtractor(Extractor):
post["instance"] = self.instance
post["post_id"] = self._pid(post)
post["date"] = self.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
def _extract_files(self, post):
if "embed" not in post:

View File

@@ -70,8 +70,7 @@ class BoothItemExtractor(BoothExtractor):
url + ".json", headers=headers, interval=False)
item["booth_category"] = item.pop("category", None)
item["date"] = self.parse_datetime(
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
item["date"] = self.parse_datetime_iso(item["published_at"])
item["tags"] = [t["name"] for t in item["tags"]]
shop = item["shop"]

View File

@@ -79,8 +79,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
"url" : url,
"album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]),
"date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}
@@ -116,8 +115,7 @@ class CheveretoVideoExtractor(CheveretoExtractor):
'class="far fa-clock"></i>', ""),
"album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]),
"date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}

View File

@@ -86,8 +86,7 @@ class CivitaiExtractor(Extractor):
images = self.api.images_post(post["id"])
post = self.api.post(post["id"])
post["date"] = self.parse_datetime(
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["publishedAt"])
data = {
"post": post,
"user": post.pop("user"),
@@ -122,8 +121,7 @@ class CivitaiExtractor(Extractor):
data["post"] = post = self._extract_meta_post(file)
if post:
post.pop("user", None)
file["date"] = self.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
file["date"] = self.parse_datetime_iso(file["createdAt"])
data["url"] = url = self._url(file)
text.nameext_from_url(url, data)
@@ -180,8 +178,7 @@ class CivitaiExtractor(Extractor):
if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"])
if "date" not in file:
file["date"] = self.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
file["date"] = self.parse_datetime_iso(file["createdAt"])
if self._meta_generation:
file["generation"] = self._extract_meta_generation(file)
yield data
@@ -216,8 +213,7 @@ class CivitaiExtractor(Extractor):
def _extract_meta_post(self, image):
try:
post = self.api.post(image["postId"])
post["date"] = self.parse_datetime(
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["publishedAt"])
return post
except Exception as exc:
return self.log.debug("", exc_info=exc)
@@ -278,8 +274,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
versions = (version,)
for version in versions:
version["date"] = self.parse_datetime(
version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
version["date"] = self.parse_datetime_iso(version["createdAt"])
data = {
"model" : model,
@@ -593,8 +588,7 @@ class CivitaiGeneratedExtractor(CivitaiExtractor):
self._require_auth()
for gen in self.api.orchestrator_queryGeneratedImages():
gen["date"] = self.parse_datetime(
gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
gen["date"] = self.parse_datetime_iso(gen["createdAt"])
yield Message.Directory, gen
for step in gen.pop("steps", ()):
for image in step.pop("images", ()):

View File

@@ -114,10 +114,8 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
"chapter_hid" : ch["hid"],
"chapter_string": chstr,
"group" : ch["group_name"],
"date" : self.parse_datetime(
ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"),
"date_updated" : self.parse_datetime(
ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
"date" : self.parse_datetime_iso(ch["created_at"][:19]),
"date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
"lang" : ch["lang"],
}

View File

@@ -166,8 +166,6 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
data["scanlator"] = content[1].text[11:]
data["tags"] = content[2].text[6:].lower().split(", ")
data["title"] = element[5].text
data["date"] = self.parse_datetime(
element[1].text, "%Y-%m-%dT%H:%M:%S%z")
data["date_updated"] = self.parse_datetime(
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
data["date"] = self.parse_datetime_iso(element[1].text)
data["date_updated"] = self.parse_datetime_iso(element[2].text)
yield Message.Queue, element[4].text, data

View File

@@ -51,8 +51,7 @@ class E621Extractor(danbooru.DanbooruExtractor):
post["filename"] = file["md5"]
post["extension"] = file["ext"]
post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(post["created_at"])
post.update(data)
yield Message.Directory, post

View File

@@ -258,8 +258,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"_" : extr('<div id="gdc"><div class="cs ct', '"'),
"eh_category" : extr('>', '<'),
"uploader" : extr('<div id="gdn">', '</div>'),
"date" : self.parse_datetime(extr(
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
"date" : self.parse_datetime_iso(extr(
'>Posted:</td><td class="gdt2">', '</td>')),
"parent" : extr(
'>Parent:</td><td class="gdt2"><a href="', '"'),
"expunged" : "Yes" != extr(

View File

@@ -55,8 +55,7 @@ class Furry34Extractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created"])
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]

View File

@@ -35,8 +35,7 @@ class GelbooruV01Extractor(booru.BooruExtractor):
}
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"])
return post

View File

@@ -101,9 +101,8 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
"model": model,
"model_list": self._parse_model_list(model),
"tags": text.split_html(tags)[1::2],
"date": self.parse_datetime(
text.extr(page, 'class="hover-time" title="', '"')[:19],
"%Y-%m-%d %H:%M:%S"),
"date": self.parse_datetime_iso(text.extr(
page, 'class="hover-time" title="', '"')[:19]),
"is_favorite": self._parse_is_favorite(page),
"source_filename": source,
"uploader": uploader,

View File

@@ -35,8 +35,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
"language" : info["language"]["name"],
"lang" : util.language_to_code(info["language"]["name"]),
"tags" : [t["slug"] for t in info["tags"]],
"date" : self.parse_datetime(
info["uploaded_at"], "%Y-%m-%d"),
"date" : self.parse_datetime_iso(info["uploaded_at"]),
}
for key in ("artists", "authors", "groups", "characters",
"relationships", "parodies"):

View File

@@ -84,7 +84,7 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
"type" : info["type"].capitalize(),
"language" : language,
"lang" : util.language_to_code(language),
"date" : self.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
"date" : self.parse_datetime_iso(date),
"tags" : tags,
"artist" : [o["artist"] for o in iget("artists") or ()],
"group" : [o["group"] for o in iget("groups") or ()],

View File

@@ -53,11 +53,9 @@ class ImagechestGalleryExtractor(GalleryExtractor):
def _metadata_api(self, page):
post = self.api.post(self.gallery_id)
post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created"])
for img in post["images"]:
img["date"] = self.parse_datetime(
img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
img["date"] = self.parse_datetime_iso(img["created"])
post["gallery_id"] = self.gallery_id
post.pop("image_count", None)

View File

@@ -159,8 +159,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
"width" : text.parse_int(extr('"og:image:width" content="', '"')),
"height": text.parse_int(extr('"og:image:height" content="', '"')),
"album" : extr("Added to <a", "</a>"),
"date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : util.json_loads(extr(
"CHV.obj.resource=", "};") + "}").get("user"),
}

View File

@@ -35,8 +35,8 @@ class InkbunnyExtractor(Extractor):
for post in self.posts():
post.update(metadata)
post["date"] = self.parse_datetime(
post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(
post["create_datetime"][:19])
post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
post["ratings"] = [r["name"] for r in post["ratings"]]
files = post["files"]
@@ -52,8 +52,8 @@ class InkbunnyExtractor(Extractor):
for post["num"], file in enumerate(files, 1):
post.update(file)
post["deleted"] = (file["deleted"] == "t")
post["date"] = self.parse_datetime(
file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(
file["create_datetime"][:19])
text.nameext_from_url(file["file_name"], post)
url = file["file_url_full"]

View File

@@ -36,8 +36,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
'{"":' + data.replace('\\"', '"')))
doc = data["initialDocumentData"]["document"]
doc["date"] = self.parse_datetime(
doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ")
doc["date"] = self.parse_datetime_iso(
doc["originalPublishDateInISOString"])
self.count = text.parse_int(doc["pageCount"])
self.base = (f"https://image.isu.pub/{doc['revisionId']}-"

View File

@@ -32,8 +32,7 @@ class ItakuExtractor(Extractor):
def items(self):
if images := self.images():
for image in images:
image["date"] = self.parse_datetime(
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
image["date"] = self.parse_datetime_iso(image["date_added"])
for category, tags in image.pop("categorized_tags").items():
image[f"tags_{category.lower()}"] = [
t["name"] for t in tags]
@@ -60,15 +59,14 @@ class ItakuExtractor(Extractor):
for post in posts:
images = post.pop("gallery_images") or ()
post["count"] = len(images)
post["date"] = self.parse_datetime(
post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["date_added"])
post["tags"] = [t["name"] for t in post["tags"]]
yield Message.Directory, post
for post["num"], image in enumerate(images, 1):
post["file"] = image
image["date"] = self.parse_datetime(
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
image["date"] = self.parse_datetime_iso(
image["date_added"])
url = image["image"]
yield Message.Url, url, text.nameext_from_url(url, post)

View File

@@ -122,10 +122,10 @@ class IwaraExtractor(Extractor):
info["file_id"] = file_info.get("id")
info["filename"] = filename
info["extension"] = extension
info["date"] = self.parse_datetime(
file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
info["date_updated"] = self.parse_datetime(
file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
info["date"] = self.parse_datetime_iso(
file_info.get("createdAt"))
info["date_updated"] = self.parse_datetime_iso(
file_info.get("updatedAt"))
info["mime"] = file_info.get("mime")
info["size"] = file_info.get("size")
info["width"] = file_info.get("width")
@@ -144,8 +144,7 @@ class IwaraExtractor(Extractor):
"status" : user.get("status"),
"role" : user.get("role"),
"premium": user.get("premium"),
"date" : self.parse_datetime(
user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"),
"date" : self.parse_datetime_iso(user.get("createdAt")),
"description": profile.get("body"),
}

View File

@@ -32,8 +32,7 @@ class KabeuchiUserExtractor(Extractor):
if post.get("is_ad") or not post["image1"]:
continue
post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"])
yield Message.Directory, post
for key in keys:

View File

@@ -238,7 +238,7 @@ class KemonoExtractor(Extractor):
def _parse_datetime(self, date_string):
if len(date_string) > 19:
date_string = date_string[:19]
return self.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
return self.parse_datetime_iso(date_string)
def _revisions(self, posts):
return itertools.chain.from_iterable(

View File

@@ -119,8 +119,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
'property="image:width" content="', '"')),
"height": text.parse_int(extr(
'property="image:height" content="', '"')),
"date" : self.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
}
text.nameext_from_url(data["url"], data)

View File

@@ -47,8 +47,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
"path": text.unescape(extr('href="', '"')),
"chapter_string": text.unescape(extr(">", "<")),
"size": text.parse_bytes(extr("<td>", "</td>")),
"date": self.parse_datetime(
extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
"date": self.parse_datetime_iso(extr("<td>", "</td>").strip()),
})
if self.config("chapter-reverse"):

View File

@@ -50,10 +50,10 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
extr = text.extract_from(page)
data = {
"date" : self.parse_datetime(extr(
'"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
"date_updated": self.parse_datetime(extr(
'"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
"date" : self.parse_datetime_iso(extr(
'"datePublished": "', '"')[:19]),
"date_updated": self.parse_datetime_iso(extr(
'"dateModified": "', '"')[:19]),
"manga_id" : text.parse_int(extr("comic_id =", ";")),
"chapter_id" : text.parse_int(extr("chapter_id =", ";")),
"manga" : extr("comic_name =", ";").strip('" '),

View File

@@ -40,10 +40,8 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
"chapter_minor": str(round(minor, 5))[1:] if minor else "",
"chapter_id" : text.parse_int(chapter_id),
"chapter_url" : comic["url"],
"date" : self.parse_datetime(
comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
"date_updated" : self.parse_datetime(
comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
"date" : self.parse_datetime_iso(comic["datePublished"]),
"date_updated" : self.parse_datetime_iso(comic["dateModified"]),
}
def images(self, page):

View File

@@ -48,13 +48,11 @@ class MisskeyExtractor(BaseExtractor):
note["instance"] = self.instance
note["instance_remote"] = note["user"]["host"]
note["count"] = len(files)
note["date"] = self.parse_datetime(
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
note["date"] = self.parse_datetime_iso(note["createdAt"])
yield Message.Directory, note
for note["num"], file in enumerate(files, 1):
file["date"] = self.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date"] = self.parse_datetime_iso(file["createdAt"])
note["file"] = file
url = file["url"]
yield Message.Url, url, text.nameext_from_url(url, note)

View File

@@ -38,10 +38,10 @@ class NaverChzzkExtractor(Extractor):
for data["num"], file in enumerate(files, 1):
if extra := file.get("extraJson"):
file.update(util.json_loads(extra))
file["date"] = self.parse_datetime(
file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date_updated"] = self.parse_datetime(
file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date"] = self.parse_datetime_iso(
file["createdDate"])
file["date_updated"] = self.parse_datetime_iso(
file["updatedDate"])
data["file"] = file
url = file["attachValue"]
yield Message.Url, url, text.nameext_from_url(url, data)

View File

@@ -59,8 +59,8 @@ class NekohousePostExtractor(NekohouseExtractor):
'class="scrape__user-name', '</').rpartition(">")[2].strip()),
"title" : text.unescape(extr(
'class="scrape__title', '</').rpartition(">")[2]),
"date" : self.parse_datetime(extr(
'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr(
'datetime="', '"')[:19]),
"content": text.unescape(extr(
'class="scrape__content">', "</div>").strip()),
}

View File

@@ -9,7 +9,7 @@
"""Extractors for https://nozomi.la/"""
from .common import Extractor, Message
from .. import text
from .. import text, dt
def decode_nozomi(n):
@@ -49,10 +49,9 @@ class NozomiExtractor(Extractor):
post["character"] = self._list(post.get("character"))
try:
post["date"] = self.parse_datetime(
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
post["date"] = dt.parse_iso(post["date"] + ":00")
except Exception:
post["date"] = None
post["date"] = dt.NONE
post.update(data)

View File

@@ -53,8 +53,7 @@ class PahealExtractor(Extractor):
extr("<source src='", "'")),
"uploader": text.unquote(extr(
"class='username' href='/user/", "'")),
"date" : self.parse_datetime(
extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
"source" : text.unescape(text.extr(
extr(">Source Link<", "</td>"), "href='", "'")),
}

View File

@@ -177,8 +177,7 @@ class PatreonExtractor(Extractor):
post, included, "attachments")
attr["attachments_media"] = self._files(
post, included, "attachments_media")
attr["date"] = self.parse_datetime(
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
attr["date"] = self.parse_datetime_iso(attr["published_at"])
try:
attr["campaign"] = (included["campaign"][
@@ -226,8 +225,7 @@ class PatreonExtractor(Extractor):
user = response.json()["data"]
attr = user["attributes"]
attr["id"] = user["id"]
attr["date"] = self.parse_datetime(
attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
attr["date"] = self.parse_datetime_iso(attr["created"])
return attr
def _collection(self, collection_id):
@@ -236,8 +234,7 @@ class PatreonExtractor(Extractor):
coll = data["data"]
attr = coll["attributes"]
attr["id"] = coll["id"]
attr["date"] = self.parse_datetime(
attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
attr["date"] = self.parse_datetime_iso(attr["created_at"])
return attr
def _filename(self, url):

View File

@@ -35,8 +35,7 @@ class PexelsExtractor(Extractor):
post["type"] = attr["type"]
post.update(metadata)
post["date"] = self.parse_datetime(
post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"][:-5])
if "image" in post:
url, _, query = post["image"]["download_link"].partition("?")

View File

@@ -36,8 +36,7 @@ class PhilomenaExtractor(BooruExtractor):
return url
def _prepare(self, post):
post["date"] = self.parse_datetime(
post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"][:19])
BASE_PATTERN = PhilomenaExtractor.update({

View File

@@ -29,8 +29,7 @@ class PhotovogueUserExtractor(Extractor):
for photo in self.photos():
url = photo["gallery_image"]
photo["title"] = photo["title"].strip()
photo["date"] = self.parse_datetime(
photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
photo["date"] = self.parse_datetime_iso(photo["date"])
yield Message.Directory, photo
yield Message.Url, url, text.nameext_from_url(url, photo)

View File

@@ -29,8 +29,7 @@ class PicartoGalleryExtractor(Extractor):
def items(self):
for post in self.posts():
post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"])
variations = post.pop("variations", ())
yield Message.Directory, post

View File

@@ -26,8 +26,7 @@ class PiczelExtractor(Extractor):
def items(self):
for post in self.posts():
post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(post["created_at"])
if post["multi"]:
images = post["images"]

View File

@@ -48,8 +48,7 @@ class PillowfortExtractor(Extractor):
for url in inline(post["content"]):
files.append({"url": url})
post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(post["created_at"])
post["post_id"] = post.pop("id")
post["count"] = len(files)
yield Message.Directory, post
@@ -76,8 +75,7 @@ class PillowfortExtractor(Extractor):
if "id" not in file:
post["id"] = post["hash"]
if "created_at" in file:
post["date"] = self.parse_datetime(
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(file["created_at"])
yield msgtype, url, post

View File

@@ -24,10 +24,6 @@ class PixeldrainExtractor(Extractor):
if api_key := self.config("api-key"):
self.session.auth = util.HTTPBasicAuth("", api_key)
def _parse_datetime(self, date_string):
return self.parse_datetime(
date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
class PixeldrainFileExtractor(PixeldrainExtractor):
"""Extractor for pixeldrain files"""

View File

@@ -150,8 +150,7 @@ class PornhubGifExtractor(PornhubExtractor):
"tags" : extr("data-context-tag='", "'").split(","),
"title": extr('"name": "', '"'),
"url" : extr('"contentUrl": "', '"'),
"date" : self.parse_datetime(
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
"date" : self.parse_datetime_iso(extr('"uploadDate": "', '"')),
"viewkey" : extr('From this video: '
'<a href="/view_video.php?viewkey=', '"'),
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),

View File

@@ -36,8 +36,7 @@ class Rule34vaultExtractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created"])
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]

View File

@@ -68,8 +68,7 @@ class Rule34xyzExtractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created"])
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]

View File

@@ -30,10 +30,8 @@ class S3ndpicsExtractor(Extractor):
for post in self.posts():
post["id"] = post.pop("_id", None)
post["user"] = post.pop("userId", None)
post["date"] = self.parse_datetime(
post["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date_updated"] = self.parse_datetime(
post["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["createdAt"])
post["date_updated"] = self.parse_datetime_iso(post["updatedAt"])
files = post.pop("files", ())
post["count"] = len(files)

View File

@@ -39,8 +39,8 @@ class SlidesharePresentationExtractor(GalleryExtractor):
"description" : slideshow["description"].strip(),
"views" : slideshow["views"],
"likes" : slideshow["likes"],
"date" : self.parse_datetime(
slideshow["createdAt"], "%Y-%m-%d %H:%M:%S %Z"),
"date" : self.parse_datetime_iso(
slideshow["createdAt"][:19]),
}
def images(self, page):

View File

@@ -57,8 +57,7 @@ class SzurubooruExtractor(booru.BooruExtractor):
return url
def _prepare(self, post):
post["date"] = self.parse_datetime(
post["creationTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["creationTime"])
tags = []
tags_categories = collections.defaultdict(list)

View File

@@ -56,8 +56,7 @@ class ThehentaiworldExtractor(Extractor):
"id" : text.parse_int(extr(" postid-", " ")),
"slug" : extr(" post-", '"'),
"tags" : extr('id="tagsHead">', "</ul>"),
"date" : self.parse_datetime(extr(
"<li>Posted: ", "<"), "%Y-%m-%d"),
"date" : self.parse_datetime_iso(extr("<li>Posted: ", "<")),
}
if (c := url[27]) == "v":

View File

@@ -37,8 +37,7 @@ class TwibooruExtractor(BooruExtractor):
return post["view_url"]
def _prepare(self, post):
post["date"] = self.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created_at"])
if "name" in post:
name, sep, rest = post["name"].rpartition(".")
@@ -146,8 +145,8 @@ class TwibooruAPI():
return response.json()
if response.status_code == 429:
until = self.parse_datetime(
response.headers["X-RL-Reset"], "%Y-%m-%d %H:%M:%S %Z")
until = self.parse_datetime_iso(
response.headers["X-RL-Reset"][:19])
# wait an extra minute, just to be safe
self.extractor.wait(until=until, adjust=60.0)
continue

View File

@@ -47,8 +47,8 @@ class VanillarockPostExtractor(VanillarockExtractor):
"count": len(imgs),
"title": text.unescape(name),
"path" : self.path.strip("/"),
"date" : self.parse_datetime(extr(
'<div class="date">', '</div>'), "%Y-%m-%d %H:%M"),
"date" : self.parse_datetime_iso(extr(
'<div class="date">', '</div>')),
"tags" : text.split_html(extr(
'<div class="cat-tag">', '</div>'))[::2],
}

View File

@@ -43,8 +43,7 @@ class WallhavenExtractor(Extractor):
wp["url"] = wp.pop("path")
if "tags" in wp:
wp["tags"] = [t["name"] for t in wp["tags"]]
wp["date"] = self.parse_datetime(
wp.pop("created_at"), "%Y-%m-%d %H:%M:%S")
wp["date"] = self.parse_datetime_iso(wp.pop("created_at"))
wp["width"] = wp.pop("dimension_x")
wp["height"] = wp.pop("dimension_y")
wp["wh_category"] = wp["category"]

View File

@@ -127,8 +127,8 @@ class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_type" : type,
"date" : self.parse_datetime(
extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"),
"date" : self.parse_datetime_iso(extr(
' datetime="', '"')[:-5]),
}
chapter.update(data)
results.append((base + chapter_id, chapter))

View File

@@ -34,8 +34,8 @@ class WikifeetGalleryExtractor(GalleryExtractor):
"celeb" : self.celeb,
"type" : self.type,
"birthplace": text.unescape(extr('"bplace":"', '"')),
"birthday" : self.parse_datetime(text.unescape(
extr('"bdate":"', '"'))[:10], "%Y-%m-%d"),
"birthday" : self.parse_datetime_iso(text.unescape(extr(
'"bdate":"', '"'))[:10]),
"shoesize" : text.unescape(extr('"ssize":', ',')),
"rating" : text.parse_float(extr('"score":', ',')),
"celebrity" : text.unescape(extr('"cname":"', '"')),

View File

@@ -75,8 +75,7 @@ class WikimediaExtractor(BaseExtractor):
for m in image["commonmetadata"] or ()}
text.nameext_from_url(image["canonicaltitle"].partition(":")[2], image)
image["date"] = self.parse_datetime(
image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
image["date"] = self.parse_datetime_iso(image["timestamp"])
def items(self):
for info in self._pagination(self.params):

View File

@@ -55,8 +55,7 @@ class YiffverseExtractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
post["date"] = self.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created"])
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]