[dt] use 'parse_datetime_iso()' for ISO formats
This commit is contained in:
@@ -85,8 +85,7 @@ class _8musesAlbumExtractor(Extractor):
|
||||
"parent" : text.parse_int(album["parentId"]),
|
||||
"views" : text.parse_int(album["numberViews"]),
|
||||
"likes" : text.parse_int(album["numberLikes"]),
|
||||
"date" : self.parse_datetime(
|
||||
album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||
"date" : self.parse_datetime_iso(album["updatedAt"]),
|
||||
}
|
||||
|
||||
def _unobfuscate(self, data):
|
||||
|
||||
@@ -182,10 +182,10 @@ class Ao3WorkExtractor(Ao3Extractor):
|
||||
extr('<dd class="freeform tags">', "</dd>")),
|
||||
"lang" : extr('<dd class="language" lang="', '"'),
|
||||
"series" : extr('<dd class="series">', "</dd>"),
|
||||
"date" : self.parse_datetime(
|
||||
extr('<dd class="published">', "<"), "%Y-%m-%d"),
|
||||
"date_completed": self.parse_datetime(
|
||||
extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'<dd class="published">', "<")),
|
||||
"date_completed": self.parse_datetime_iso(extr(
|
||||
'>Completed:</dt><dd class="status">', "<")),
|
||||
"date_updated" : self.parse_timestamp(
|
||||
path.rpartition("updated_at=")[2]),
|
||||
"words" : text.parse_int(
|
||||
|
||||
@@ -49,8 +49,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
|
||||
files = self._extract_files(post)
|
||||
|
||||
post["count"] = len(files)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
|
||||
post["post_url"] = post_url = \
|
||||
f"{self.root}/b/{post['boardSlug']}/{post['id']}"
|
||||
post["_http_headers"] = {"Referer": post_url + "?p=1"}
|
||||
|
||||
@@ -126,8 +126,7 @@ class ArtstationExtractor(Extractor):
|
||||
data["title"] = text.unescape(data["title"])
|
||||
data["description"] = text.unescape(text.remove_html(
|
||||
data["description"]))
|
||||
data["date"] = self.parse_datetime(
|
||||
data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
data["date"] = self.parse_datetime_iso(data["created_at"])
|
||||
|
||||
assets = data["assets"]
|
||||
del data["assets"]
|
||||
|
||||
@@ -167,8 +167,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
|
||||
|
||||
data["chapter"] = text.parse_int(chapter)
|
||||
data["chapter_minor"] = sep + minor
|
||||
data["date"] = self.parse_datetime(
|
||||
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
data["date"] = self.parse_datetime_iso(extr('time="', '"'))
|
||||
|
||||
url = f"{self.root}/title/{href}"
|
||||
results.append((url, data.copy()))
|
||||
|
||||
@@ -135,8 +135,7 @@ class BlueskyExtractor(Extractor):
|
||||
|
||||
post["instance"] = self.instance
|
||||
post["post_id"] = self._pid(post)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
|
||||
|
||||
def _extract_files(self, post):
|
||||
if "embed" not in post:
|
||||
|
||||
@@ -70,8 +70,7 @@ class BoothItemExtractor(BoothExtractor):
|
||||
url + ".json", headers=headers, interval=False)
|
||||
|
||||
item["booth_category"] = item.pop("category", None)
|
||||
item["date"] = self.parse_datetime(
|
||||
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
item["date"] = self.parse_datetime_iso(item["published_at"])
|
||||
item["tags"] = [t["name"] for t in item["tags"]]
|
||||
|
||||
shop = item["shop"]
|
||||
|
||||
@@ -79,8 +79,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
|
||||
"url" : url,
|
||||
"album": text.remove_html(extr(
|
||||
"Added to <a", "</a>").rpartition(">")[2]),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
"user" : extr('username: "', '"'),
|
||||
}
|
||||
|
||||
@@ -116,8 +115,7 @@ class CheveretoVideoExtractor(CheveretoExtractor):
|
||||
'class="far fa-clock"></i>', "—"),
|
||||
"album": text.remove_html(extr(
|
||||
"Added to <a", "</a>").rpartition(">")[2]),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
"user" : extr('username: "', '"'),
|
||||
}
|
||||
|
||||
|
||||
@@ -86,8 +86,7 @@ class CivitaiExtractor(Extractor):
|
||||
images = self.api.images_post(post["id"])
|
||||
|
||||
post = self.api.post(post["id"])
|
||||
post["date"] = self.parse_datetime(
|
||||
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["publishedAt"])
|
||||
data = {
|
||||
"post": post,
|
||||
"user": post.pop("user"),
|
||||
@@ -122,8 +121,7 @@ class CivitaiExtractor(Extractor):
|
||||
data["post"] = post = self._extract_meta_post(file)
|
||||
if post:
|
||||
post.pop("user", None)
|
||||
file["date"] = self.parse_datetime(
|
||||
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
file["date"] = self.parse_datetime_iso(file["createdAt"])
|
||||
|
||||
data["url"] = url = self._url(file)
|
||||
text.nameext_from_url(url, data)
|
||||
@@ -180,8 +178,7 @@ class CivitaiExtractor(Extractor):
|
||||
if "id" not in file and data["filename"].isdecimal():
|
||||
file["id"] = text.parse_int(data["filename"])
|
||||
if "date" not in file:
|
||||
file["date"] = self.parse_datetime(
|
||||
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
file["date"] = self.parse_datetime_iso(file["createdAt"])
|
||||
if self._meta_generation:
|
||||
file["generation"] = self._extract_meta_generation(file)
|
||||
yield data
|
||||
@@ -216,8 +213,7 @@ class CivitaiExtractor(Extractor):
|
||||
def _extract_meta_post(self, image):
|
||||
try:
|
||||
post = self.api.post(image["postId"])
|
||||
post["date"] = self.parse_datetime(
|
||||
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["publishedAt"])
|
||||
return post
|
||||
except Exception as exc:
|
||||
return self.log.debug("", exc_info=exc)
|
||||
@@ -278,8 +274,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
|
||||
versions = (version,)
|
||||
|
||||
for version in versions:
|
||||
version["date"] = self.parse_datetime(
|
||||
version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
version["date"] = self.parse_datetime_iso(version["createdAt"])
|
||||
|
||||
data = {
|
||||
"model" : model,
|
||||
@@ -593,8 +588,7 @@ class CivitaiGeneratedExtractor(CivitaiExtractor):
|
||||
self._require_auth()
|
||||
|
||||
for gen in self.api.orchestrator_queryGeneratedImages():
|
||||
gen["date"] = self.parse_datetime(
|
||||
gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
gen["date"] = self.parse_datetime_iso(gen["createdAt"])
|
||||
yield Message.Directory, gen
|
||||
for step in gen.pop("steps", ()):
|
||||
for image in step.pop("images", ()):
|
||||
|
||||
@@ -114,10 +114,8 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
|
||||
"chapter_hid" : ch["hid"],
|
||||
"chapter_string": chstr,
|
||||
"group" : ch["group_name"],
|
||||
"date" : self.parse_datetime(
|
||||
ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date_updated" : self.parse_datetime(
|
||||
ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(ch["created_at"][:19]),
|
||||
"date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
|
||||
"lang" : ch["lang"],
|
||||
}
|
||||
|
||||
|
||||
@@ -166,8 +166,6 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
|
||||
data["scanlator"] = content[1].text[11:]
|
||||
data["tags"] = content[2].text[6:].lower().split(", ")
|
||||
data["title"] = element[5].text
|
||||
data["date"] = self.parse_datetime(
|
||||
element[1].text, "%Y-%m-%dT%H:%M:%S%z")
|
||||
data["date_updated"] = self.parse_datetime(
|
||||
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
|
||||
data["date"] = self.parse_datetime_iso(element[1].text)
|
||||
data["date_updated"] = self.parse_datetime_iso(element[2].text)
|
||||
yield Message.Queue, element[4].text, data
|
||||
|
||||
@@ -51,8 +51,7 @@ class E621Extractor(danbooru.DanbooruExtractor):
|
||||
|
||||
post["filename"] = file["md5"]
|
||||
post["extension"] = file["ext"]
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
|
||||
post.update(data)
|
||||
yield Message.Directory, post
|
||||
|
||||
@@ -258,8 +258,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
"_" : extr('<div id="gdc"><div class="cs ct', '"'),
|
||||
"eh_category" : extr('>', '<'),
|
||||
"uploader" : extr('<div id="gdn">', '</div>'),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'>Posted:</td><td class="gdt2">', '</td>')),
|
||||
"parent" : extr(
|
||||
'>Parent:</td><td class="gdt2"><a href="', '"'),
|
||||
"expunged" : "Yes" != extr(
|
||||
|
||||
@@ -55,8 +55,7 @@ class Furry34Extractor(BooruExtractor):
|
||||
|
||||
def _prepare(self, post):
|
||||
post.pop("files", None)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created"])
|
||||
post["filename"], _, post["format"] = post["filename"].rpartition(".")
|
||||
if "tags" in post:
|
||||
post["tags"] = [t["value"] for t in post["tags"]]
|
||||
|
||||
@@ -35,8 +35,7 @@ class GelbooruV01Extractor(booru.BooruExtractor):
|
||||
}
|
||||
|
||||
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%d %H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
|
||||
return post
|
||||
|
||||
|
||||
@@ -101,9 +101,8 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
|
||||
"model": model,
|
||||
"model_list": self._parse_model_list(model),
|
||||
"tags": text.split_html(tags)[1::2],
|
||||
"date": self.parse_datetime(
|
||||
text.extr(page, 'class="hover-time" title="', '"')[:19],
|
||||
"%Y-%m-%d %H:%M:%S"),
|
||||
"date": self.parse_datetime_iso(text.extr(
|
||||
page, 'class="hover-time" title="', '"')[:19]),
|
||||
"is_favorite": self._parse_is_favorite(page),
|
||||
"source_filename": source,
|
||||
"uploader": uploader,
|
||||
|
||||
@@ -35,8 +35,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
|
||||
"language" : info["language"]["name"],
|
||||
"lang" : util.language_to_code(info["language"]["name"]),
|
||||
"tags" : [t["slug"] for t in info["tags"]],
|
||||
"date" : self.parse_datetime(
|
||||
info["uploaded_at"], "%Y-%m-%d"),
|
||||
"date" : self.parse_datetime_iso(info["uploaded_at"]),
|
||||
}
|
||||
for key in ("artists", "authors", "groups", "characters",
|
||||
"relationships", "parodies"):
|
||||
|
||||
@@ -84,7 +84,7 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
|
||||
"type" : info["type"].capitalize(),
|
||||
"language" : language,
|
||||
"lang" : util.language_to_code(language),
|
||||
"date" : self.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
|
||||
"date" : self.parse_datetime_iso(date),
|
||||
"tags" : tags,
|
||||
"artist" : [o["artist"] for o in iget("artists") or ()],
|
||||
"group" : [o["group"] for o in iget("groups") or ()],
|
||||
|
||||
@@ -53,11 +53,9 @@ class ImagechestGalleryExtractor(GalleryExtractor):
|
||||
def _metadata_api(self, page):
|
||||
post = self.api.post(self.gallery_id)
|
||||
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created"])
|
||||
for img in post["images"]:
|
||||
img["date"] = self.parse_datetime(
|
||||
img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
img["date"] = self.parse_datetime_iso(img["created"])
|
||||
|
||||
post["gallery_id"] = self.gallery_id
|
||||
post.pop("image_count", None)
|
||||
|
||||
@@ -159,8 +159,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
|
||||
"width" : text.parse_int(extr('"og:image:width" content="', '"')),
|
||||
"height": text.parse_int(extr('"og:image:height" content="', '"')),
|
||||
"album" : extr("Added to <a", "</a>"),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
"user" : util.json_loads(extr(
|
||||
"CHV.obj.resource=", "};") + "}").get("user"),
|
||||
}
|
||||
|
||||
@@ -35,8 +35,8 @@ class InkbunnyExtractor(Extractor):
|
||||
|
||||
for post in self.posts():
|
||||
post.update(metadata)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(
|
||||
post["create_datetime"][:19])
|
||||
post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
|
||||
post["ratings"] = [r["name"] for r in post["ratings"]]
|
||||
files = post["files"]
|
||||
@@ -52,8 +52,8 @@ class InkbunnyExtractor(Extractor):
|
||||
for post["num"], file in enumerate(files, 1):
|
||||
post.update(file)
|
||||
post["deleted"] = (file["deleted"] == "t")
|
||||
post["date"] = self.parse_datetime(
|
||||
file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(
|
||||
file["create_datetime"][:19])
|
||||
text.nameext_from_url(file["file_name"], post)
|
||||
|
||||
url = file["file_url_full"]
|
||||
|
||||
@@ -36,8 +36,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
|
||||
'{"":' + data.replace('\\"', '"')))
|
||||
|
||||
doc = data["initialDocumentData"]["document"]
|
||||
doc["date"] = self.parse_datetime(
|
||||
doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
doc["date"] = self.parse_datetime_iso(
|
||||
doc["originalPublishDateInISOString"])
|
||||
|
||||
self.count = text.parse_int(doc["pageCount"])
|
||||
self.base = (f"https://image.isu.pub/{doc['revisionId']}-"
|
||||
|
||||
@@ -32,8 +32,7 @@ class ItakuExtractor(Extractor):
|
||||
def items(self):
|
||||
if images := self.images():
|
||||
for image in images:
|
||||
image["date"] = self.parse_datetime(
|
||||
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
image["date"] = self.parse_datetime_iso(image["date_added"])
|
||||
for category, tags in image.pop("categorized_tags").items():
|
||||
image[f"tags_{category.lower()}"] = [
|
||||
t["name"] for t in tags]
|
||||
@@ -60,15 +59,14 @@ class ItakuExtractor(Extractor):
|
||||
for post in posts:
|
||||
images = post.pop("gallery_images") or ()
|
||||
post["count"] = len(images)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["date_added"])
|
||||
post["tags"] = [t["name"] for t in post["tags"]]
|
||||
|
||||
yield Message.Directory, post
|
||||
for post["num"], image in enumerate(images, 1):
|
||||
post["file"] = image
|
||||
image["date"] = self.parse_datetime(
|
||||
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
image["date"] = self.parse_datetime_iso(
|
||||
image["date_added"])
|
||||
|
||||
url = image["image"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
@@ -122,10 +122,10 @@ class IwaraExtractor(Extractor):
|
||||
info["file_id"] = file_info.get("id")
|
||||
info["filename"] = filename
|
||||
info["extension"] = extension
|
||||
info["date"] = self.parse_datetime(
|
||||
file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
info["date_updated"] = self.parse_datetime(
|
||||
file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
info["date"] = self.parse_datetime_iso(
|
||||
file_info.get("createdAt"))
|
||||
info["date_updated"] = self.parse_datetime_iso(
|
||||
file_info.get("updatedAt"))
|
||||
info["mime"] = file_info.get("mime")
|
||||
info["size"] = file_info.get("size")
|
||||
info["width"] = file_info.get("width")
|
||||
@@ -144,8 +144,7 @@ class IwaraExtractor(Extractor):
|
||||
"status" : user.get("status"),
|
||||
"role" : user.get("role"),
|
||||
"premium": user.get("premium"),
|
||||
"date" : self.parse_datetime(
|
||||
user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"),
|
||||
"date" : self.parse_datetime_iso(user.get("createdAt")),
|
||||
"description": profile.get("body"),
|
||||
}
|
||||
|
||||
|
||||
@@ -32,8 +32,7 @@ class KabeuchiUserExtractor(Extractor):
|
||||
if post.get("is_ad") or not post["image1"]:
|
||||
continue
|
||||
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%d %H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
yield Message.Directory, post
|
||||
|
||||
for key in keys:
|
||||
|
||||
@@ -238,7 +238,7 @@ class KemonoExtractor(Extractor):
|
||||
def _parse_datetime(self, date_string):
|
||||
if len(date_string) > 19:
|
||||
date_string = date_string[:19]
|
||||
return self.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
|
||||
return self.parse_datetime_iso(date_string)
|
||||
|
||||
def _revisions(self, posts):
|
||||
return itertools.chain.from_iterable(
|
||||
|
||||
@@ -119,8 +119,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
|
||||
'property="image:width" content="', '"')),
|
||||
"height": text.parse_int(extr(
|
||||
'property="image:height" content="', '"')),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
}
|
||||
|
||||
text.nameext_from_url(data["url"], data)
|
||||
|
||||
@@ -47,8 +47,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
|
||||
"path": text.unescape(extr('href="', '"')),
|
||||
"chapter_string": text.unescape(extr(">", "<")),
|
||||
"size": text.parse_bytes(extr("<td>", "</td>")),
|
||||
"date": self.parse_datetime(
|
||||
extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
|
||||
"date": self.parse_datetime_iso(extr("<td>", "</td>").strip()),
|
||||
})
|
||||
|
||||
if self.config("chapter-reverse"):
|
||||
|
||||
@@ -50,10 +50,10 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
|
||||
extr = text.extract_from(page)
|
||||
|
||||
data = {
|
||||
"date" : self.parse_datetime(extr(
|
||||
'"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date_updated": self.parse_datetime(extr(
|
||||
'"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'"datePublished": "', '"')[:19]),
|
||||
"date_updated": self.parse_datetime_iso(extr(
|
||||
'"dateModified": "', '"')[:19]),
|
||||
"manga_id" : text.parse_int(extr("comic_id =", ";")),
|
||||
"chapter_id" : text.parse_int(extr("chapter_id =", ";")),
|
||||
"manga" : extr("comic_name =", ";").strip('" '),
|
||||
|
||||
@@ -40,10 +40,8 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
|
||||
"chapter_minor": str(round(minor, 5))[1:] if minor else "",
|
||||
"chapter_id" : text.parse_int(chapter_id),
|
||||
"chapter_url" : comic["url"],
|
||||
"date" : self.parse_datetime(
|
||||
comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date_updated" : self.parse_datetime(
|
||||
comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date" : self.parse_datetime_iso(comic["datePublished"]),
|
||||
"date_updated" : self.parse_datetime_iso(comic["dateModified"]),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
|
||||
@@ -48,13 +48,11 @@ class MisskeyExtractor(BaseExtractor):
|
||||
note["instance"] = self.instance
|
||||
note["instance_remote"] = note["user"]["host"]
|
||||
note["count"] = len(files)
|
||||
note["date"] = self.parse_datetime(
|
||||
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
note["date"] = self.parse_datetime_iso(note["createdAt"])
|
||||
|
||||
yield Message.Directory, note
|
||||
for note["num"], file in enumerate(files, 1):
|
||||
file["date"] = self.parse_datetime(
|
||||
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
file["date"] = self.parse_datetime_iso(file["createdAt"])
|
||||
note["file"] = file
|
||||
url = file["url"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, note)
|
||||
|
||||
@@ -38,10 +38,10 @@ class NaverChzzkExtractor(Extractor):
|
||||
for data["num"], file in enumerate(files, 1):
|
||||
if extra := file.get("extraJson"):
|
||||
file.update(util.json_loads(extra))
|
||||
file["date"] = self.parse_datetime(
|
||||
file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
file["date_updated"] = self.parse_datetime(
|
||||
file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
file["date"] = self.parse_datetime_iso(
|
||||
file["createdDate"])
|
||||
file["date_updated"] = self.parse_datetime_iso(
|
||||
file["updatedDate"])
|
||||
data["file"] = file
|
||||
url = file["attachValue"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
@@ -59,8 +59,8 @@ class NekohousePostExtractor(NekohouseExtractor):
|
||||
'class="scrape__user-name', '</').rpartition(">")[2].strip()),
|
||||
"title" : text.unescape(extr(
|
||||
'class="scrape__title', '</').rpartition(">")[2]),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'datetime="', '"')[:19]),
|
||||
"content": text.unescape(extr(
|
||||
'class="scrape__content">', "</div>").strip()),
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://nozomi.la/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
from .. import text, dt
|
||||
|
||||
|
||||
def decode_nozomi(n):
|
||||
@@ -49,10 +49,9 @@ class NozomiExtractor(Extractor):
|
||||
post["character"] = self._list(post.get("character"))
|
||||
|
||||
try:
|
||||
post["date"] = self.parse_datetime(
|
||||
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
|
||||
post["date"] = dt.parse_iso(post["date"] + ":00")
|
||||
except Exception:
|
||||
post["date"] = None
|
||||
post["date"] = dt.NONE
|
||||
|
||||
post.update(data)
|
||||
|
||||
|
||||
@@ -53,8 +53,7 @@ class PahealExtractor(Extractor):
|
||||
extr("<source src='", "'")),
|
||||
"uploader": text.unquote(extr(
|
||||
"class='username' href='/user/", "'")),
|
||||
"date" : self.parse_datetime(
|
||||
extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
|
||||
"source" : text.unescape(text.extr(
|
||||
extr(">Source Link<", "</td>"), "href='", "'")),
|
||||
}
|
||||
|
||||
@@ -177,8 +177,7 @@ class PatreonExtractor(Extractor):
|
||||
post, included, "attachments")
|
||||
attr["attachments_media"] = self._files(
|
||||
post, included, "attachments_media")
|
||||
attr["date"] = self.parse_datetime(
|
||||
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
attr["date"] = self.parse_datetime_iso(attr["published_at"])
|
||||
|
||||
try:
|
||||
attr["campaign"] = (included["campaign"][
|
||||
@@ -226,8 +225,7 @@ class PatreonExtractor(Extractor):
|
||||
user = response.json()["data"]
|
||||
attr = user["attributes"]
|
||||
attr["id"] = user["id"]
|
||||
attr["date"] = self.parse_datetime(
|
||||
attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
attr["date"] = self.parse_datetime_iso(attr["created"])
|
||||
return attr
|
||||
|
||||
def _collection(self, collection_id):
|
||||
@@ -236,8 +234,7 @@ class PatreonExtractor(Extractor):
|
||||
coll = data["data"]
|
||||
attr = coll["attributes"]
|
||||
attr["id"] = coll["id"]
|
||||
attr["date"] = self.parse_datetime(
|
||||
attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
attr["date"] = self.parse_datetime_iso(attr["created_at"])
|
||||
return attr
|
||||
|
||||
def _filename(self, url):
|
||||
|
||||
@@ -35,8 +35,7 @@ class PexelsExtractor(Extractor):
|
||||
post["type"] = attr["type"]
|
||||
|
||||
post.update(metadata)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"][:-5])
|
||||
|
||||
if "image" in post:
|
||||
url, _, query = post["image"]["download_link"].partition("?")
|
||||
|
||||
@@ -36,8 +36,7 @@ class PhilomenaExtractor(BooruExtractor):
|
||||
return url
|
||||
|
||||
def _prepare(self, post):
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"][:19])
|
||||
|
||||
|
||||
BASE_PATTERN = PhilomenaExtractor.update({
|
||||
|
||||
@@ -29,8 +29,7 @@ class PhotovogueUserExtractor(Extractor):
|
||||
for photo in self.photos():
|
||||
url = photo["gallery_image"]
|
||||
photo["title"] = photo["title"].strip()
|
||||
photo["date"] = self.parse_datetime(
|
||||
photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
photo["date"] = self.parse_datetime_iso(photo["date"])
|
||||
|
||||
yield Message.Directory, photo
|
||||
yield Message.Url, url, text.nameext_from_url(url, photo)
|
||||
|
||||
@@ -29,8 +29,7 @@ class PicartoGalleryExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%d %H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
variations = post.pop("variations", ())
|
||||
yield Message.Directory, post
|
||||
|
||||
|
||||
@@ -26,8 +26,7 @@ class PiczelExtractor(Extractor):
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
|
||||
if post["multi"]:
|
||||
images = post["images"]
|
||||
|
||||
@@ -48,8 +48,7 @@ class PillowfortExtractor(Extractor):
|
||||
for url in inline(post["content"]):
|
||||
files.append({"url": url})
|
||||
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
post["post_id"] = post.pop("id")
|
||||
post["count"] = len(files)
|
||||
yield Message.Directory, post
|
||||
@@ -76,8 +75,7 @@ class PillowfortExtractor(Extractor):
|
||||
if "id" not in file:
|
||||
post["id"] = post["hash"]
|
||||
if "created_at" in file:
|
||||
post["date"] = self.parse_datetime(
|
||||
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(file["created_at"])
|
||||
|
||||
yield msgtype, url, post
|
||||
|
||||
|
||||
@@ -24,10 +24,6 @@ class PixeldrainExtractor(Extractor):
|
||||
if api_key := self.config("api-key"):
|
||||
self.session.auth = util.HTTPBasicAuth("", api_key)
|
||||
|
||||
def _parse_datetime(self, date_string):
|
||||
return self.parse_datetime(
|
||||
date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
|
||||
|
||||
class PixeldrainFileExtractor(PixeldrainExtractor):
|
||||
"""Extractor for pixeldrain files"""
|
||||
|
||||
@@ -150,8 +150,7 @@ class PornhubGifExtractor(PornhubExtractor):
|
||||
"tags" : extr("data-context-tag='", "'").split(","),
|
||||
"title": extr('"name": "', '"'),
|
||||
"url" : extr('"contentUrl": "', '"'),
|
||||
"date" : self.parse_datetime(
|
||||
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
|
||||
"date" : self.parse_datetime_iso(extr('"uploadDate": "', '"')),
|
||||
"viewkey" : extr('From this video: '
|
||||
'<a href="/view_video.php?viewkey=', '"'),
|
||||
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),
|
||||
|
||||
@@ -36,8 +36,7 @@ class Rule34vaultExtractor(BooruExtractor):
|
||||
|
||||
def _prepare(self, post):
|
||||
post.pop("files", None)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created"])
|
||||
if "tags" in post:
|
||||
post["tags"] = [t["value"] for t in post["tags"]]
|
||||
|
||||
|
||||
@@ -68,8 +68,7 @@ class Rule34xyzExtractor(BooruExtractor):
|
||||
|
||||
def _prepare(self, post):
|
||||
post.pop("files", None)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created"])
|
||||
post["filename"], _, post["format"] = post["filename"].rpartition(".")
|
||||
if "tags" in post:
|
||||
post["tags"] = [t["value"] for t in post["tags"]]
|
||||
|
||||
@@ -30,10 +30,8 @@ class S3ndpicsExtractor(Extractor):
|
||||
for post in self.posts():
|
||||
post["id"] = post.pop("_id", None)
|
||||
post["user"] = post.pop("userId", None)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date_updated"] = self.parse_datetime(
|
||||
post["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["createdAt"])
|
||||
post["date_updated"] = self.parse_datetime_iso(post["updatedAt"])
|
||||
|
||||
files = post.pop("files", ())
|
||||
post["count"] = len(files)
|
||||
|
||||
@@ -39,8 +39,8 @@ class SlidesharePresentationExtractor(GalleryExtractor):
|
||||
"description" : slideshow["description"].strip(),
|
||||
"views" : slideshow["views"],
|
||||
"likes" : slideshow["likes"],
|
||||
"date" : self.parse_datetime(
|
||||
slideshow["createdAt"], "%Y-%m-%d %H:%M:%S %Z"),
|
||||
"date" : self.parse_datetime_iso(
|
||||
slideshow["createdAt"][:19]),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
|
||||
@@ -57,8 +57,7 @@ class SzurubooruExtractor(booru.BooruExtractor):
|
||||
return url
|
||||
|
||||
def _prepare(self, post):
|
||||
post["date"] = self.parse_datetime(
|
||||
post["creationTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["creationTime"])
|
||||
|
||||
tags = []
|
||||
tags_categories = collections.defaultdict(list)
|
||||
|
||||
@@ -56,8 +56,7 @@ class ThehentaiworldExtractor(Extractor):
|
||||
"id" : text.parse_int(extr(" postid-", " ")),
|
||||
"slug" : extr(" post-", '"'),
|
||||
"tags" : extr('id="tagsHead">', "</ul>"),
|
||||
"date" : self.parse_datetime(extr(
|
||||
"<li>Posted: ", "<"), "%Y-%m-%d"),
|
||||
"date" : self.parse_datetime_iso(extr("<li>Posted: ", "<")),
|
||||
}
|
||||
|
||||
if (c := url[27]) == "v":
|
||||
|
||||
@@ -37,8 +37,7 @@ class TwibooruExtractor(BooruExtractor):
|
||||
return post["view_url"]
|
||||
|
||||
def _prepare(self, post):
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
|
||||
if "name" in post:
|
||||
name, sep, rest = post["name"].rpartition(".")
|
||||
@@ -146,8 +145,8 @@ class TwibooruAPI():
|
||||
return response.json()
|
||||
|
||||
if response.status_code == 429:
|
||||
until = self.parse_datetime(
|
||||
response.headers["X-RL-Reset"], "%Y-%m-%d %H:%M:%S %Z")
|
||||
until = self.parse_datetime_iso(
|
||||
response.headers["X-RL-Reset"][:19])
|
||||
# wait an extra minute, just to be safe
|
||||
self.extractor.wait(until=until, adjust=60.0)
|
||||
continue
|
||||
|
||||
@@ -47,8 +47,8 @@ class VanillarockPostExtractor(VanillarockExtractor):
|
||||
"count": len(imgs),
|
||||
"title": text.unescape(name),
|
||||
"path" : self.path.strip("/"),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'<div class="date">', '</div>'), "%Y-%m-%d %H:%M"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'<div class="date">', '</div>')),
|
||||
"tags" : text.split_html(extr(
|
||||
'<div class="cat-tag">', '</div>'))[::2],
|
||||
}
|
||||
|
||||
@@ -43,8 +43,7 @@ class WallhavenExtractor(Extractor):
|
||||
wp["url"] = wp.pop("path")
|
||||
if "tags" in wp:
|
||||
wp["tags"] = [t["name"] for t in wp["tags"]]
|
||||
wp["date"] = self.parse_datetime(
|
||||
wp.pop("created_at"), "%Y-%m-%d %H:%M:%S")
|
||||
wp["date"] = self.parse_datetime_iso(wp.pop("created_at"))
|
||||
wp["width"] = wp.pop("dimension_x")
|
||||
wp["height"] = wp.pop("dimension_y")
|
||||
wp["wh_category"] = wp["category"]
|
||||
|
||||
@@ -127,8 +127,8 @@ class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
|
||||
"chapter" : text.parse_int(chapter),
|
||||
"chapter_minor": sep + minor,
|
||||
"chapter_type" : type,
|
||||
"date" : self.parse_datetime(
|
||||
extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
' datetime="', '"')[:-5]),
|
||||
}
|
||||
chapter.update(data)
|
||||
results.append((base + chapter_id, chapter))
|
||||
|
||||
@@ -34,8 +34,8 @@ class WikifeetGalleryExtractor(GalleryExtractor):
|
||||
"celeb" : self.celeb,
|
||||
"type" : self.type,
|
||||
"birthplace": text.unescape(extr('"bplace":"', '"')),
|
||||
"birthday" : self.parse_datetime(text.unescape(
|
||||
extr('"bdate":"', '"'))[:10], "%Y-%m-%d"),
|
||||
"birthday" : self.parse_datetime_iso(text.unescape(extr(
|
||||
'"bdate":"', '"'))[:10]),
|
||||
"shoesize" : text.unescape(extr('"ssize":', ',')),
|
||||
"rating" : text.parse_float(extr('"score":', ',')),
|
||||
"celebrity" : text.unescape(extr('"cname":"', '"')),
|
||||
|
||||
@@ -75,8 +75,7 @@ class WikimediaExtractor(BaseExtractor):
|
||||
for m in image["commonmetadata"] or ()}
|
||||
|
||||
text.nameext_from_url(image["canonicaltitle"].partition(":")[2], image)
|
||||
image["date"] = self.parse_datetime(
|
||||
image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
|
||||
image["date"] = self.parse_datetime_iso(image["timestamp"])
|
||||
|
||||
def items(self):
|
||||
for info in self._pagination(self.params):
|
||||
|
||||
@@ -55,8 +55,7 @@ class YiffverseExtractor(BooruExtractor):
|
||||
|
||||
def _prepare(self, post):
|
||||
post.pop("files", None)
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created"])
|
||||
post["filename"], _, post["format"] = post["filename"].rpartition(".")
|
||||
if "tags" in post:
|
||||
post["tags"] = [t["value"] for t in post["tags"]]
|
||||
|
||||
Reference in New Issue
Block a user