merge branch 'dt': move datetime utils into separate module

- use 'datetime.fromisoformat()' when possible (#7671)
- return a datetime-compatible object for invalid datetimes
  (instead of a 'str' value)
This commit is contained in:
Mike Fährmann
2025-10-20 09:30:05 +02:00
177 changed files with 652 additions and 708 deletions

115
gallery_dl/dt.py Normal file
View File

@@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Date/Time utilities"""
import sys
import time
from datetime import datetime, date, timedelta, timezone # noqa F401
class NullDatetime(datetime):
def __bool__(self):
return False
def __str__(self):
return "[Invalid DateTime]"
def __format__(self, format_spec):
return "[Invalid DateTime]"
NONE = NullDatetime(1, 1, 1)
EPOCH = datetime(1970, 1, 1)
SECOND = timedelta(0, 1)
def normalize(dt):
# if (o := dt.utcoffset()) is not None:
# return dt.replace(tzinfo=None, microsecond=0) - o
if dt.tzinfo is not None:
return dt.astimezone(timezone.utc).replace(tzinfo=None, microsecond=0)
if dt.microsecond:
return dt.replace(microsecond=0)
return dt
def convert(value):
"""Convert 'value' to a naive UTC datetime object"""
if not value:
return NONE
if isinstance(value, datetime):
return normalize(value)
if isinstance(value, str) and (dt := parse_iso(value)) is not NONE:
return dt
return parse_ts(value)
def parse(dt_string, format):
"""Parse 'dt_string' according to 'format'"""
try:
return normalize(datetime.strptime(dt_string, format))
except Exception:
return NONE
if sys.hexversion < 0x30c0000:
# Python <= 3.11
def parse_iso(dt_string):
"""Parse 'dt_string' as ISO 8601 value"""
try:
if dt_string[-1] == "Z":
# compat for Python < 3.11
dt_string = dt_string[:-1]
elif dt_string[-5] in "+-":
# compat for Python < 3.11
dt_string = f"{dt_string[:-2]}:{dt_string[-2:]}"
return normalize(datetime.fromisoformat(dt_string))
except Exception:
return NONE
from_ts = datetime.utcfromtimestamp
now = datetime.utcnow
else:
# Python >= 3.12
def parse_iso(dt_string):
"""Parse 'dt_string' as ISO 8601 value"""
try:
return normalize(datetime.fromisoformat(dt_string))
except Exception:
return NONE
def from_ts(ts=None):
"""Convert Unix timestamp to naive UTC datetime"""
Y, m, d, H, M, S, _, _, _ = time.gmtime(ts)
return datetime(Y, m, d, H, M, S)
now = from_ts
def parse_ts(ts, default=NONE):
"""Create a datetime object from a Unix timestamp"""
try:
return from_ts(int(ts))
except Exception:
return default
def to_ts(dt):
"""Convert naive UTC datetime to Unix timestamp"""
return (dt - EPOCH) / SECOND
def to_ts_string(dt):
"""Convert naive UTC datetime to Unix timestamp string"""
try:
return str((dt - EPOCH) // SECOND)
except Exception:
return ""

View File

@@ -46,7 +46,7 @@ class _2chThreadExtractor(Extractor):
for post in posts: for post in posts:
if files := post.get("files"): if files := post.get("files"):
post["post_name"] = post["name"] post["post_name"] = post["name"]
post["date"] = text.parse_timestamp(post["timestamp"]) post["date"] = self.parse_timestamp(post["timestamp"])
del post["files"] del post["files"]
del post["name"] del post["name"]

View File

@@ -65,7 +65,7 @@ class _2chenThreadExtractor(Extractor):
extr = text.extract_from(post) extr = text.extract_from(post)
return { return {
"name" : text.unescape(extr("<span>", "</span>")), "name" : text.unescape(extr("<span>", "</span>")),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr("<time", "<").partition(">")[2], extr("<time", "<").partition(">")[2],
"%d %b %Y (%a) %H:%M:%S" "%d %b %Y (%a) %H:%M:%S"
), ),

View File

@@ -7,7 +7,7 @@
"""Extractors for https://4archive.org/""" """Extractors for https://4archive.org/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util from .. import text, dt
class _4archiveThreadExtractor(Extractor): class _4archiveThreadExtractor(Extractor):
@@ -37,7 +37,7 @@ class _4archiveThreadExtractor(Extractor):
for post in posts: for post in posts:
post.update(data) post.update(data)
post["time"] = int(util.datetime_to_timestamp(post["date"])) post["time"] = int(dt.to_ts(post["date"]))
yield Message.Directory, post yield Message.Directory, post
if "url" in post: if "url" in post:
yield Message.Url, post["url"], text.nameext_from_url( yield Message.Url, post["url"], text.nameext_from_url(
@@ -61,10 +61,9 @@ class _4archiveThreadExtractor(Extractor):
extr = text.extract_from(post) extr = text.extract_from(post)
data = { data = {
"name": extr('class="name">', "</span>"), "name": extr('class="name">', "</span>"),
"date": text.parse_datetime( "date": self.parse_datetime_iso(
(extr('class="dateTime">', "<") or (extr('class="dateTime">', "<") or
extr('class="dateTime postNum" >', "<")).strip(), extr('class="dateTime postNum" >', "<")).strip()),
"%Y-%m-%d %H:%M:%S"),
"no" : text.parse_int(extr(">Post No.", "<")), "no" : text.parse_int(extr(">Post No.", "<")),
} }
if 'class="file"' in post: if 'class="file"' in post:

View File

@@ -9,9 +9,8 @@
"""Extractors for https://8chan.moe/""" """Extractors for https://8chan.moe/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util from .. import text, dt
from ..cache import memcache from ..cache import memcache
from datetime import timedelta
import itertools import itertools
BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)" BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)"
@@ -44,7 +43,7 @@ class _8chanExtractor(Extractor):
def cookies_prepare(self): def cookies_prepare(self):
# fetch captcha cookies # fetch captcha cookies
# (necessary to download without getting interrupted) # (necessary to download without getting interrupted)
now = util.datetime_utcnow() now = dt.now()
url = self.root + "/captcha.js" url = self.root + "/captcha.js"
params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")} params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")}
self.request(url, params=params).content self.request(url, params=params).content
@@ -57,7 +56,7 @@ class _8chanExtractor(Extractor):
if cookie.domain.endswith(domain): if cookie.domain.endswith(domain):
cookie.expires = None cookie.expires = None
if cookie.name == "captchaexpiration": if cookie.name == "captchaexpiration":
cookie.value = (now + timedelta(30, 300)).strftime( cookie.value = (now + dt.timedelta(30, 300)).strftime(
"%a, %d %b %Y %H:%M:%S GMT") "%a, %d %b %Y %H:%M:%S GMT")
return self.cookies return self.cookies

View File

@@ -85,8 +85,7 @@ class _8musesAlbumExtractor(Extractor):
"parent" : text.parse_int(album["parentId"]), "parent" : text.parse_int(album["parentId"]),
"views" : text.parse_int(album["numberViews"]), "views" : text.parse_int(album["numberViews"]),
"likes" : text.parse_int(album["numberLikes"]), "likes" : text.parse_int(album["numberLikes"]),
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(album["updatedAt"]),
album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"),
} }
def _unobfuscate(self, data): def _unobfuscate(self, data):

View File

@@ -33,7 +33,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
"gallery_id": text.parse_int(self.gallery_id), "gallery_id": text.parse_int(self.gallery_id),
"title" : text.unescape(extr('title="', '"')), "title" : text.unescape(extr('title="', '"')),
"studio" : extr(">studio</small>", "<").strip(), "studio" : extr(">studio</small>", "<").strip(),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
">released</small>", "<").strip(), "%m/%d/%Y"), ">released</small>", "<").strip(), "%m/%d/%Y"),
"actors" : sorted(text.split_html(extr( "actors" : sorted(text.split_html(extr(
'<ul class="item-details item-cast-list ', '</ul>'))[1:]), '<ul class="item-details item-cast-list ', '</ul>'))[1:]),

View File

@@ -33,7 +33,7 @@ class AgnphExtractor(booru.BooruExtractor):
self.cookies.set("confirmed_age", "true", domain="agn.ph") self.cookies.set("confirmed_age", "true", domain="agn.ph")
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_timestamp(post["created_at"]) post["date"] = self.parse_timestamp(post["created_at"])
post["status"] = post["status"].strip() post["status"] = post["status"].strip()
post["has_children"] = ("true" in post["has_children"]) post["has_children"] = ("true" in post["has_children"])

View File

@@ -182,11 +182,11 @@ class Ao3WorkExtractor(Ao3Extractor):
extr('<dd class="freeform tags">', "</dd>")), extr('<dd class="freeform tags">', "</dd>")),
"lang" : extr('<dd class="language" lang="', '"'), "lang" : extr('<dd class="language" lang="', '"'),
"series" : extr('<dd class="series">', "</dd>"), "series" : extr('<dd class="series">', "</dd>"),
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(extr(
extr('<dd class="published">', "<"), "%Y-%m-%d"), '<dd class="published">', "<")),
"date_completed": text.parse_datetime( "date_completed": self.parse_datetime_iso(extr(
extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"), '>Completed:</dt><dd class="status">', "<")),
"date_updated" : text.parse_timestamp( "date_updated" : self.parse_timestamp(
path.rpartition("updated_at=")[2]), path.rpartition("updated_at=")[2]),
"words" : text.parse_int( "words" : text.parse_int(
extr('<dd class="words">', "<").replace(",", "")), extr('<dd class="words">', "<").replace(",", "")),

View File

@@ -49,8 +49,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
files = self._extract_files(post) files = self._extract_files(post)
post["count"] = len(files) post["count"] = len(files)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
post["post_url"] = post_url = \ post["post_url"] = post_url = \
f"{self.root}/b/{post['boardSlug']}/{post['id']}" f"{self.root}/b/{post['boardSlug']}/{post['id']}"
post["_http_headers"] = {"Referer": post_url + "?p=1"} post["_http_headers"] = {"Referer": post_url + "?p=1"}

View File

@@ -126,8 +126,7 @@ class ArtstationExtractor(Extractor):
data["title"] = text.unescape(data["title"]) data["title"] = text.unescape(data["title"])
data["description"] = text.unescape(text.remove_html( data["description"] = text.unescape(text.remove_html(
data["description"])) data["description"]))
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime_iso(data["created_at"])
data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
assets = data["assets"] assets = data["assets"]
del data["assets"] del data["assets"]

View File

@@ -9,10 +9,9 @@
"""Extractors for https://aryion.com/""" """Extractors for https://aryion.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, dt, exception
from ..cache import cache from ..cache import cache
from email.utils import parsedate_tz from email.utils import parsedate_tz
from datetime import datetime
BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4" BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
@@ -156,7 +155,7 @@ class AryionExtractor(Extractor):
"artist": artist, "artist": artist,
"path" : text.split_html(extr( "path" : text.split_html(extr(
"cookiecrumb'>", '</span'))[4:-1:2], "cookiecrumb'>", '</span'))[4:-1:2],
"date" : datetime(*parsedate_tz(lmod)[:6]), "date" : dt.datetime(*parsedate_tz(lmod)[:6]),
"size" : text.parse_int(clen), "size" : text.parse_int(clen),
"views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")), "views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
"width" : text.parse_int(extr("Resolution</b>:", "x")), "width" : text.parse_int(extr("Resolution</b>:", "x")),

View File

@@ -123,7 +123,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
"chapter_minor" : minor, "chapter_minor" : minor,
"chapter_string": info, "chapter_string": info,
"chapter_id" : text.parse_int(self.chapter_id), "chapter_id" : text.parse_int(self.chapter_id),
"date" : text.parse_timestamp(extr(' time="', '"')[:-3]), "date" : self.parse_timestamp(extr(' time="', '"')[:-3]),
} }
def images(self, page): def images(self, page):
@@ -167,8 +167,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
data["chapter"] = text.parse_int(chapter) data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor data["chapter_minor"] = sep + minor
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime_iso(extr('time="', '"'))
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
url = f"{self.root}/title/{href}" url = f"{self.root}/title/{href}"
results.append((url, data.copy())) results.append((url, data.copy()))
@@ -188,9 +187,9 @@ def _manga_info(self, manga_id, page=None):
"manga" : data["name"][1], "manga" : data["name"][1],
"manga_id" : text.parse_int(manga_id), "manga_id" : text.parse_int(manga_id),
"manga_slug" : data["slug"][1], "manga_slug" : data["slug"][1],
"manga_date" : text.parse_timestamp( "manga_date" : self.parse_timestamp(
data["dateCreate"][1] // 1000), data["dateCreate"][1] // 1000),
"manga_date_updated": text.parse_timestamp( "manga_date_updated": self.parse_timestamp(
data["dateUpdate"][1] / 1000), data["dateUpdate"][1] / 1000),
"author" : json_list(data["authors"]), "author" : json_list(data["authors"]),
"artist" : json_list(data["artists"]), "artist" : json_list(data["artists"]),

View File

@@ -67,7 +67,7 @@ class BehanceExtractor(Extractor):
tags = [tag["title"] for tag in tags] tags = [tag["title"] for tag in tags]
data["tags"] = tags data["tags"] = tags
data["date"] = text.parse_timestamp( data["date"] = self.parse_timestamp(
data.get("publishedOn") or data.get("conceived_on") or 0) data.get("publishedOn") or data.get("conceived_on") or 0)
if creator := data.get("creator"): if creator := data.get("creator"):

View File

@@ -144,8 +144,8 @@ class BellazonExtractor(Extractor):
"title": schema["headline"], "title": schema["headline"],
"views": stats[0]["userInteractionCount"], "views": stats[0]["userInteractionCount"],
"posts": stats[1]["userInteractionCount"], "posts": stats[1]["userInteractionCount"],
"date" : text.parse_datetime(schema["datePublished"]), "date" : self.parse_datetime_iso(schema["datePublished"]),
"date_updated": text.parse_datetime(schema["dateModified"]), "date_updated": self.parse_datetime_iso(schema["dateModified"]),
"description" : text.unescape(schema["text"]).strip(), "description" : text.unescape(schema["text"]).strip(),
"section" : path[-2], "section" : path[-2],
"author" : author["name"], "author" : author["name"],
@@ -169,7 +169,7 @@ class BellazonExtractor(Extractor):
post = { post = {
"id": extr('id="elComment_', '"'), "id": extr('id="elComment_', '"'),
"author_url": extr(" href='", "'"), "author_url": extr(" href='", "'"),
"date": text.parse_datetime(extr("datetime='", "'")), "date": self.parse_datetime_iso(extr("datetime='", "'")),
"content": extr("<!-- Post content -->", "\n\t\t</div>"), "content": extr("<!-- Post content -->", "\n\t\t</div>"),
} }

View File

@@ -40,7 +40,7 @@ class BloggerExtractor(BaseExtractor):
blog = self.api.blog_by_url("http://" + self.blog) blog = self.api.blog_by_url("http://" + self.blog)
blog["pages"] = blog["pages"]["totalItems"] blog["pages"] = blog["pages"]["totalItems"]
blog["posts"] = blog["posts"]["totalItems"] blog["posts"] = blog["posts"]["totalItems"]
blog["date"] = text.parse_datetime(blog["published"]) blog["date"] = self.parse_datetime_iso(blog["published"])
del blog["selfLink"] del blog["selfLink"]
findall_image = util.re( findall_image = util.re(
@@ -65,7 +65,7 @@ class BloggerExtractor(BaseExtractor):
post["author"] = post["author"]["displayName"] post["author"] = post["author"]["displayName"]
post["replies"] = post["replies"]["totalItems"] post["replies"] = post["replies"]["totalItems"]
post["content"] = text.remove_html(content) post["content"] = text.remove_html(content)
post["date"] = text.parse_datetime(post["published"]) post["date"] = self.parse_datetime_iso(post["published"])
del post["selfLink"] del post["selfLink"]
del post["blog"] del post["blog"]

View File

@@ -135,8 +135,7 @@ class BlueskyExtractor(Extractor):
post["instance"] = self.instance post["instance"] = self.instance
post["post_id"] = self._pid(post) post["post_id"] = self._pid(post)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
def _extract_files(self, post): def _extract_files(self, post):
if "embed" not in post: if "embed" not in post:

View File

@@ -78,7 +78,7 @@ class BoostyExtractor(Extractor):
post["links"] = links = [] post["links"] = links = []
if "createdAt" in post: if "createdAt" in post:
post["date"] = text.parse_timestamp(post["createdAt"]) post["date"] = self.parse_timestamp(post["createdAt"])
for block in post["data"]: for block in post["data"]:
try: try:

View File

@@ -70,8 +70,7 @@ class BoothItemExtractor(BoothExtractor):
url + ".json", headers=headers, interval=False) url + ".json", headers=headers, interval=False)
item["booth_category"] = item.pop("category", None) item["booth_category"] = item.pop("category", None)
item["date"] = text.parse_datetime( item["date"] = self.parse_datetime_iso(item["published_at"])
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
item["tags"] = [t["name"] for t in item["tags"]] item["tags"] = [t["name"] for t in item["tags"]]
shop = item["shop"] shop = item["shop"]

View File

@@ -168,7 +168,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
item, 'name: "', ".") item, 'name: "', ".")
file["size"] = text.parse_int(text.extr( file["size"] = text.parse_int(text.extr(
item, "size: ", " ,\n")) item, "size: ", " ,\n"))
file["date"] = text.parse_datetime(text.extr( file["date"] = self.parse_datetime(text.extr(
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y") item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
yield file yield file

View File

@@ -28,7 +28,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
return { return {
"album_id" : self.page_url.rpartition("/")[2], "album_id" : self.page_url.rpartition("/")[2],
"album_name" : text.unescape(extr("<h1>", "<")), "album_name" : text.unescape(extr("<h1>", "<")),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
"<p>Created ", "<"), "%B %d %Y"), "<p>Created ", "<"), "%B %d %Y"),
"description": text.unescape(extr("<p>", "<")), "description": text.unescape(extr("<p>", "<")),
} }

View File

@@ -79,8 +79,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
"url" : url, "url" : url,
"album": text.remove_html(extr( "album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]), "Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime_iso(extr('<span title="', '"')),
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : extr('username: "', '"'), "user" : extr('username: "', '"'),
} }
@@ -116,8 +115,7 @@ class CheveretoVideoExtractor(CheveretoExtractor):
'class="far fa-clock"></i>', ""), 'class="far fa-clock"></i>', ""),
"album": text.remove_html(extr( "album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]), "Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime_iso(extr('<span title="', '"')),
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : extr('username: "', '"'), "user" : extr('username: "', '"'),
} }

View File

@@ -61,7 +61,7 @@ class CienArticleExtractor(CienExtractor):
post["post_url"] = url post["post_url"] = url
post["post_id"] = text.parse_int(post_id) post["post_id"] = text.parse_int(post_id)
post["count"] = len(files) post["count"] = len(files)
post["date"] = text.parse_datetime(post["datePublished"]) post["date"] = self.parse_datetime_iso(post["datePublished"])
try: try:
post["author"]["id"] = text.parse_int(author_id) post["author"]["id"] = text.parse_int(author_id)

View File

@@ -86,8 +86,7 @@ class CivitaiExtractor(Extractor):
images = self.api.images_post(post["id"]) images = self.api.images_post(post["id"])
post = self.api.post(post["id"]) post = self.api.post(post["id"])
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["publishedAt"])
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
data = { data = {
"post": post, "post": post,
"user": post.pop("user"), "user": post.pop("user"),
@@ -122,8 +121,7 @@ class CivitaiExtractor(Extractor):
data["post"] = post = self._extract_meta_post(file) data["post"] = post = self._extract_meta_post(file)
if post: if post:
post.pop("user", None) post.pop("user", None)
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime_iso(file["createdAt"])
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
data["url"] = url = self._url(file) data["url"] = url = self._url(file)
text.nameext_from_url(url, data) text.nameext_from_url(url, data)
@@ -180,8 +178,7 @@ class CivitaiExtractor(Extractor):
if "id" not in file and data["filename"].isdecimal(): if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"]) file["id"] = text.parse_int(data["filename"])
if "date" not in file: if "date" not in file:
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime_iso(file["createdAt"])
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
if self._meta_generation: if self._meta_generation:
file["generation"] = self._extract_meta_generation(file) file["generation"] = self._extract_meta_generation(file)
yield data yield data
@@ -216,8 +213,7 @@ class CivitaiExtractor(Extractor):
def _extract_meta_post(self, image): def _extract_meta_post(self, image):
try: try:
post = self.api.post(image["postId"]) post = self.api.post(image["postId"])
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["publishedAt"])
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
return post return post
except Exception as exc: except Exception as exc:
return self.log.traceback(exc) return self.log.traceback(exc)
@@ -278,8 +274,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
versions = (version,) versions = (version,)
for version in versions: for version in versions:
version["date"] = text.parse_datetime( version["date"] = self.parse_datetime_iso(version["createdAt"])
version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
data = { data = {
"model" : model, "model" : model,
@@ -593,8 +588,7 @@ class CivitaiGeneratedExtractor(CivitaiExtractor):
self._require_auth() self._require_auth()
for gen in self.api.orchestrator_queryGeneratedImages(): for gen in self.api.orchestrator_queryGeneratedImages():
gen["date"] = text.parse_datetime( gen["date"] = self.parse_datetime_iso(gen["createdAt"])
gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
yield Message.Directory, gen yield Message.Directory, gen
for step in gen.pop("steps", ()): for step in gen.pop("steps", ()):
for image in step.pop("images", ()): for image in step.pop("images", ()):

View File

@@ -114,10 +114,8 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
"chapter_hid" : ch["hid"], "chapter_hid" : ch["hid"],
"chapter_string": chstr, "chapter_string": chstr,
"group" : ch["group_name"], "group" : ch["group_name"],
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(ch["created_at"][:19]),
ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"), "date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
"date_updated" : text.parse_datetime(
ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
"lang" : ch["lang"], "lang" : ch["lang"],
} }

View File

@@ -60,6 +60,6 @@ class ComicvineTagExtractor(BooruExtractor):
_file_url = operator.itemgetter("original") _file_url = operator.itemgetter("original")
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["dateCreated"], "%a, %b %d %Y") post["dateCreated"], "%a, %b %d %Y")
post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]] post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]

View File

@@ -19,11 +19,10 @@ import getpass
import logging import logging
import requests import requests
import threading import threading
from datetime import datetime
from xml.etree import ElementTree from xml.etree import ElementTree
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from .message import Message from .message import Message
from .. import config, output, text, util, cache, exception from .. import config, output, text, util, dt, cache, exception
urllib3 = requests.packages.urllib3 urllib3 = requests.packages.urllib3
@@ -64,6 +63,10 @@ class Extractor():
else: else:
self.category = CATEGORY_MAP[self.category] self.category = CATEGORY_MAP[self.category]
self.parse_datetime = dt.parse
self.parse_datetime_iso = dt.parse_iso
self.parse_timestamp = dt.parse_ts
self._cfgpath = ("extractor", self.category, self.subcategory) self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = "" self._parentdir = ""
@@ -313,9 +316,9 @@ class Extractor():
seconds = float(seconds) seconds = float(seconds)
until = now + seconds until = now + seconds
elif until: elif until:
if isinstance(until, datetime): if isinstance(until, dt.datetime):
# convert to UTC timestamp # convert to UTC timestamp
until = util.datetime_to_timestamp(until) until = dt.to_ts(until)
else: else:
until = float(until) until = float(until)
seconds = until - now seconds = until - now
@@ -327,7 +330,7 @@ class Extractor():
return return
if reason: if reason:
t = datetime.fromtimestamp(until).time() t = dt.datetime.fromtimestamp(until).time()
isotime = f"{t.hour:02}:{t.minute:02}:{t.second:02}" isotime = f"{t.hour:02}:{t.minute:02}:{t.second:02}"
self.log.info("Waiting until %s (%s)", isotime, reason) self.log.info("Waiting until %s (%s)", isotime, reason)
time.sleep(seconds) time.sleep(seconds)
@@ -652,7 +655,7 @@ class Extractor():
self.log.warning( self.log.warning(
"cookies: %s/%s expired at %s", "cookies: %s/%s expired at %s",
cookie.domain.lstrip("."), cookie.name, cookie.domain.lstrip("."), cookie.name,
datetime.fromtimestamp(cookie.expires)) dt.datetime.fromtimestamp(cookie.expires))
continue continue
elif diff <= 86400: elif diff <= 86400:
@@ -694,7 +697,7 @@ class Extractor():
ts = self.config(key, default) ts = self.config(key, default)
if isinstance(ts, str): if isinstance(ts, str):
try: try:
ts = int(datetime.strptime(ts, fmt).timestamp()) ts = int(dt.parse(ts, fmt).timestamp())
except ValueError as exc: except ValueError as exc:
self.log.warning("Unable to parse '%s': %s", key, exc) self.log.warning("Unable to parse '%s': %s", key, exc)
ts = default ts = default

View File

@@ -47,7 +47,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
"album_name" : text.unescape(extr('title="', '"')), "album_name" : text.unescape(extr('title="', '"')),
"album_size" : text.parse_bytes(extr( "album_size" : text.parse_bytes(extr(
'<p class="title">', "B")), '<p class="title">', "B")),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'<p class="title">', '<'), "%d.%m.%Y"), '<p class="title">', '<'), "%d.%m.%Y"),
"description": text.unescape(text.unescape( # double "description": text.unescape(text.unescape( # double
desc.rpartition(" [R")[0])), desc.rpartition(" [R")[0])),

View File

@@ -113,7 +113,7 @@ class CyberfileFileExtractor(CyberfileExtractor):
"Filesize:", "</tr>"))[:-1]), "Filesize:", "</tr>"))[:-1]),
"tags" : text.split_html(extr( "tags" : text.split_html(extr(
"Keywords:", "</tr>")), "Keywords:", "</tr>")),
"date" : text.parse_datetime(text.remove_html(extr( "date" : self.parse_datetime(text.remove_html(extr(
"Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"), "Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"),
"permissions": text.remove_html(extr( "permissions": text.remove_html(extr(
"Permissions:", "</tr>")).split(" &amp; "), "Permissions:", "</tr>")).split(" &amp; "),

View File

@@ -9,8 +9,7 @@
"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" """Extractors for https://danbooru.donmai.us/ and other Danbooru instances"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text, util from .. import text, util, dt
import datetime
class DanbooruExtractor(BaseExtractor): class DanbooruExtractor(BaseExtractor):
@@ -69,8 +68,7 @@ class DanbooruExtractor(BaseExtractor):
continue continue
text.nameext_from_url(url, post) text.nameext_from_url(url, post)
post["date"] = text.parse_datetime( post["date"] = dt.parse_iso(post["created_at"])
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["tags"] = ( post["tags"] = (
post["tag_string"].split(" ") post["tag_string"].split(" ")
@@ -357,11 +355,11 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def metadata(self): def metadata(self):
self.params = params = text.parse_query(self.groups[-1]) self.params = params = text.parse_query(self.groups[-1])
scale = params.get("scale", "day") scale = params.get("scale", "day")
date = params.get("date") or datetime.date.today().isoformat() date = params.get("date") or dt.date.today().isoformat()
if scale == "week": if scale == "week":
date = datetime.date.fromisoformat(date) date = dt.date.fromisoformat(date)
date = (date - datetime.timedelta(days=date.weekday())).isoformat() date = (date - dt.timedelta(days=date.weekday())).isoformat()
elif scale == "month": elif scale == "month":
date = date[:-3] date = date[:-3]

View File

@@ -68,7 +68,7 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
"chapter_minor": minor, "chapter_minor": minor,
"group" : manga["groups"][group_id].split(" & "), "group" : manga["groups"][group_id].split(" & "),
"group_id" : text.parse_int(group_id), "group_id" : text.parse_int(group_id),
"date" : text.parse_timestamp(data["release_date"][group_id]), "date" : self.parse_timestamp(data["release_date"][group_id]),
"lang" : util.NONE, "lang" : util.NONE,
"language" : util.NONE, "language" : util.NONE,
} }

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.deviantart.com/""" """Extractors for https://www.deviantart.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util, dt, exception
from ..cache import cache, memcache from ..cache import cache, memcache
import collections import collections
import mimetypes import mimetypes
@@ -259,7 +259,7 @@ class DeviantartExtractor(Extractor):
deviation["published_time"] = text.parse_int( deviation["published_time"] = text.parse_int(
deviation["published_time"]) deviation["published_time"])
deviation["date"] = text.parse_timestamp( deviation["date"] = self.parse_timestamp(
deviation["published_time"]) deviation["published_time"])
if self.comments: if self.comments:
@@ -1187,8 +1187,8 @@ class DeviantartStatusExtractor(DeviantartExtractor):
deviation["username"] = deviation["author"]["username"] deviation["username"] = deviation["author"]["username"]
deviation["_username"] = deviation["username"].lower() deviation["_username"] = deviation["username"].lower()
deviation["date"] = dt = text.parse_datetime(deviation["ts"]) deviation["date"] = d = self.parse_datetime_iso(deviation["ts"])
deviation["published_time"] = int(util.datetime_to_timestamp(dt)) deviation["published_time"] = int(dt.to_ts(d))
deviation["da_category"] = "Status" deviation["da_category"] = "Status"
deviation["category_path"] = "status" deviation["category_path"] = "status"

View File

@@ -72,9 +72,7 @@ class DiscordExtractor(Extractor):
"author_files": [], "author_files": [],
"message": self.extract_message_text(message), "message": self.extract_message_text(message),
"message_id": message["id"], "message_id": message["id"],
"date": text.parse_datetime( "date": self.parse_datetime_iso(message["timestamp"]),
message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z"
),
"files": [] "files": []
}) })

View File

@@ -62,7 +62,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"author" : text.remove_html(author), "author" : text.remove_html(author),
"group" : (text.remove_html(group) or "group" : (text.remove_html(group) or
text.extr(group, ' alt="', '"')), text.extr(group, ' alt="', '"')),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
'"icon-calendar"></i> ', '<'), "%b %d, %Y"), '"icon-calendar"></i> ', '<'), "%b %d, %Y"),
"tags" : text.split_html(extr( "tags" : text.split_html(extr(
"class='tags'>", "<div id='chapter-actions'")), "class='tags'>", "<div id='chapter-actions'")),
@@ -166,8 +166,6 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
data["scanlator"] = content[1].text[11:] data["scanlator"] = content[1].text[11:]
data["tags"] = content[2].text[6:].lower().split(", ") data["tags"] = content[2].text[6:].lower().split(", ")
data["title"] = element[5].text data["title"] = element[5].text
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime_iso(element[1].text)
element[1].text, "%Y-%m-%dT%H:%M:%S%z") data["date_updated"] = self.parse_datetime_iso(element[2].text)
data["date_updated"] = text.parse_datetime(
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
yield Message.Queue, element[4].text, data yield Message.Queue, element[4].text, data

View File

@@ -51,8 +51,7 @@ class E621Extractor(danbooru.DanbooruExtractor):
post["filename"] = file["md5"] post["filename"] = file["md5"]
post["extension"] = file["ext"] post["extension"] = file["ext"]
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"])
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post.update(data) post.update(data)
yield Message.Directory, post yield Message.Directory, post

View File

@@ -96,7 +96,7 @@ class EromeAlbumExtractor(EromeExtractor):
if not date: if not date:
ts = text.extr(group, '?v=', '"') ts = text.extr(group, '?v=', '"')
if len(ts) > 1: if len(ts) > 1:
date = text.parse_timestamp(ts) date = self.parse_timestamp(ts)
data = { data = {
"album_id": album_id, "album_id": album_id,

View File

@@ -216,7 +216,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def _items_hitomi(self): def _items_hitomi(self):
if self.config("metadata", False): if self.config("metadata", False):
data = self.metadata_from_api() data = self.metadata_from_api()
data["date"] = text.parse_timestamp(data["posted"]) data["date"] = self.parse_timestamp(data["posted"])
else: else:
data = {} data = {}
@@ -233,7 +233,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data = self.metadata_from_page(page) data = self.metadata_from_page(page)
if self.config("metadata", False): if self.config("metadata", False):
data.update(self.metadata_from_api()) data.update(self.metadata_from_api())
data["date"] = text.parse_timestamp(data["posted"]) data["date"] = self.parse_timestamp(data["posted"])
if self.config("tags", False): if self.config("tags", False):
tags = collections.defaultdict(list) tags = collections.defaultdict(list)
for tag in data["tags"]: for tag in data["tags"]:
@@ -258,8 +258,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"_" : extr('<div id="gdc"><div class="cs ct', '"'), "_" : extr('<div id="gdc"><div class="cs ct', '"'),
"eh_category" : extr('>', '<'), "eh_category" : extr('>', '<'),
"uploader" : extr('<div id="gdn">', '</div>'), "uploader" : extr('<div id="gdn">', '</div>'),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime_iso(extr(
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"), '>Posted:</td><td class="gdt2">', '</td>')),
"parent" : extr( "parent" : extr(
'>Parent:</td><td class="gdt2"><a href="', '"'), '>Parent:</td><td class="gdt2"><a href="', '"'),
"expunged" : "Yes" != extr( "expunged" : "Yes" != extr(

View File

@@ -108,7 +108,7 @@ class FacebookExtractor(Extractor):
'"message":{"delight_ranges"', '"message":{"delight_ranges"',
'"},"message_preferred_body"' '"},"message_preferred_body"'
).rsplit('],"text":"', 1)[-1]), ).rsplit('],"text":"', 1)[-1]),
"date": text.parse_timestamp( "date": self.parse_timestamp(
text.extr(photo_page, '\\"publish_time\\":', ',') or text.extr(photo_page, '\\"publish_time\\":', ',') or
text.extr(photo_page, '"created_time":', ',') text.extr(photo_page, '"created_time":', ',')
), ),
@@ -172,7 +172,7 @@ class FacebookExtractor(Extractor):
"user_id": text.extr( "user_id": text.extr(
video_page, '"owner":{"__typename":"User","id":"', '"' video_page, '"owner":{"__typename":"User","id":"', '"'
), ),
"date": text.parse_timestamp(text.extr( "date": self.parse_timestamp(text.extr(
video_page, '\\"publish_time\\":', ',' video_page, '\\"publish_time\\":', ','
)), )),
"type": "video" "type": "video"

View File

@@ -128,7 +128,7 @@ class FanboxExtractor(Extractor):
if file.get("extension", "").lower() in exts if file.get("extension", "").lower() in exts
] ]
post["date"] = text.parse_datetime(post["publishedDatetime"]) post["date"] = self.parse_datetime_iso(post["publishedDatetime"])
post["text"] = content_body.get("text") if content_body else None post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False post["isCoverImage"] = False

View File

@@ -35,7 +35,7 @@ class FanslyExtractor(Extractor):
for post in self.posts(): for post in self.posts():
files = self._extract_files(post) files = self._extract_files(post)
post["count"] = len(files) post["count"] = len(files)
post["date"] = text.parse_timestamp(post["createdAt"]) post["date"] = self.parse_timestamp(post["createdAt"])
yield Message.Directory, post yield Message.Directory, post
for post["num"], file in enumerate(files, 1): for post["num"], file in enumerate(files, 1):
@@ -117,8 +117,8 @@ class FanslyExtractor(Extractor):
file = { file = {
**variant, **variant,
"format": variant["type"], "format": variant["type"],
"date": text.parse_timestamp(media["createdAt"]), "date": self.parse_timestamp(media["createdAt"]),
"date_updated": text.parse_timestamp(media["updatedAt"]), "date_updated": self.parse_timestamp(media["updatedAt"]),
} }
if "metadata" in location: if "metadata" in location:

View File

@@ -101,7 +101,7 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"], "comment": resp["comment"],
"rating": resp["rating"], "rating": resp["rating"],
"posted_at": resp["posted_at"], "posted_at": resp["posted_at"],
"date": text.parse_datetime( "date": self.parse_datetime(
resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"), resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"], "fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"], "fanclub_user_id": resp["fanclub"]["user"]["id"],

View File

@@ -98,7 +98,7 @@ class FlickrImageExtractor(FlickrExtractor):
photo["comments"] = text.parse_int(photo["comments"]["_content"]) photo["comments"] = text.parse_int(photo["comments"]["_content"])
photo["description"] = photo["description"]["_content"] photo["description"] = photo["description"]["_content"]
photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]] photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
photo["date"] = text.parse_timestamp(photo["dateuploaded"]) photo["date"] = self.parse_timestamp(photo["dateuploaded"])
photo["views"] = text.parse_int(photo["views"]) photo["views"] = text.parse_int(photo["views"])
photo["id"] = text.parse_int(photo["id"]) photo["id"] = text.parse_int(photo["id"])
@@ -489,7 +489,7 @@ class FlickrAPI(oauth.OAuth1API):
def _extract_format(self, photo): def _extract_format(self, photo):
photo["description"] = photo["description"]["_content"].strip() photo["description"] = photo["description"]["_content"].strip()
photo["views"] = text.parse_int(photo["views"]) photo["views"] = text.parse_int(photo["views"])
photo["date"] = text.parse_timestamp(photo["dateupload"]) photo["date"] = self.parse_timestamp(photo["dateupload"])
photo["tags"] = photo["tags"].split() photo["tags"] = photo["tags"].split()
self._extract_metadata(photo) self._extract_metadata(photo)

View File

@@ -143,7 +143,7 @@ class FuraffinityExtractor(Extractor):
data["folders"] = () # folders not present in old layout data["folders"] = () # folders not present in old layout
data["user"] = self.user or data["artist_url"] data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) data["date"] = self.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"]) data["description"] = self._process_description(data["_description"])
data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-" data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-"
f"{path.rsplit('/', 2)[1]}.jpg") f"{path.rsplit('/', 2)[1]}.jpg")

View File

@@ -55,8 +55,7 @@ class Furry34Extractor(BooruExtractor):
def _prepare(self, post): def _prepare(self, post):
post.pop("files", None) post.pop("files", None)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created"])
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["filename"], _, post["format"] = post["filename"].rpartition(".") post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post: if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]] post["tags"] = [t["value"] for t in post["tags"]]

View File

@@ -246,7 +246,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs: for fav in favs:
for post in self._api_request({"id": fav["favorite"]}): for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"]) post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post yield post
params["pid"] += 1 params["pid"] += 1
@@ -273,7 +273,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs: for fav in favs:
for post in self._api_request({"id": fav["favorite"]}): for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"]) post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post yield post
params["pid"] -= 1 params["pid"] -= 1

View File

@@ -35,8 +35,7 @@ class GelbooruV01Extractor(booru.BooruExtractor):
} }
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0] post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"])
post["created_at"], "%Y-%m-%d %H:%M:%S")
return post return post

View File

@@ -122,7 +122,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _prepare(self, post): def _prepare(self, post):
post["tags"] = post["tags"].strip() post["tags"] = post["tags"].strip()
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y") post["created_at"], "%a %b %d %H:%M:%S %z %Y")
def _html(self, post): def _html(self, post):

View File

@@ -52,7 +52,7 @@ class GirlsreleasedSetExtractor(GirlsreleasedExtractor):
"id": json["id"], "id": json["id"],
"site": json["site"], "site": json["site"],
"model": [model for _, model in json["models"]], "model": [model for _, model in json["models"]],
"date": text.parse_timestamp(json["date"]), "date": self.parse_timestamp(json["date"]),
"count": len(json["images"]), "count": len(json["images"]),
"url": "https://girlsreleased.com/set/" + json["id"], "url": "https://girlsreleased.com/set/" + json["id"],
} }

View File

@@ -101,9 +101,8 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
"model": model, "model": model,
"model_list": self._parse_model_list(model), "model_list": self._parse_model_list(model),
"tags": text.split_html(tags)[1::2], "tags": text.split_html(tags)[1::2],
"date": text.parse_datetime( "date": self.parse_datetime_iso(text.extr(
text.extr(page, 'class="hover-time" title="', '"')[:19], page, 'class="hover-time" title="', '"')[:19]),
"%Y-%m-%d %H:%M:%S"),
"is_favorite": self._parse_is_favorite(page), "is_favorite": self._parse_is_favorite(page),
"source_filename": source, "source_filename": source,
"uploader": uploader, "uploader": uploader,

View File

@@ -34,7 +34,7 @@ class HatenablogExtractor(Extractor):
def _handle_article(self, article: str): def _handle_article(self, article: str):
extr = text.extract_from(article) extr = text.extract_from(article)
date = text.parse_datetime(extr('<time datetime="', '"')) date = self.parse_datetime_iso(extr('<time datetime="', '"'))
entry_link = text.unescape(extr('<a href="', '"')) entry_link = text.unescape(extr('<a href="', '"'))
entry = entry_link.partition("/entry/")[2] entry = entry_link.partition("/entry/")[2]
title = text.unescape(extr('>', '<')) title = text.unescape(extr('>', '<'))

View File

@@ -86,7 +86,7 @@ class HentaifoundryExtractor(Extractor):
.replace("\r\n", "\n")), .replace("\r\n", "\n")),
"ratings" : [text.unescape(r) for r in text.extract_iter(extr( "ratings" : [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")], "class='ratings_box'", "</div>"), "title='", "'")],
"date" : text.parse_datetime(extr("datetime='", "'")), "date" : self.parse_datetime_iso(extr("datetime='", "'")),
"views" : text.parse_int(extr(">Views</span>", "<")), "views" : text.parse_int(extr(">Views</span>", "<")),
"score" : text.parse_int(extr(">Vote Score</span>", "<")), "score" : text.parse_int(extr(">Vote Score</span>", "<")),
"media" : text.unescape(extr(">Media</span>", "<").strip()), "media" : text.unescape(extr(">Media</span>", "<").strip()),
@@ -126,7 +126,7 @@ class HentaifoundryExtractor(Extractor):
"title" : text.unescape(extr( "title" : text.unescape(extr(
"<div class='titlebar'>", "</a>").rpartition(">")[2]), "<div class='titlebar'>", "</a>").rpartition(">")[2]),
"author" : text.unescape(extr('alt="', '"')), "author" : text.unescape(extr('alt="', '"')),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime(extr(
">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"), ">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
"status" : extr("class='indent'>", "<"), "status" : extr("class='indent'>", "<"),
} }

View File

@@ -35,8 +35,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
"language" : info["language"]["name"], "language" : info["language"]["name"],
"lang" : util.language_to_code(info["language"]["name"]), "lang" : util.language_to_code(info["language"]["name"]),
"tags" : [t["slug"] for t in info["tags"]], "tags" : [t["slug"] for t in info["tags"]],
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(info["uploaded_at"]),
info["uploaded_at"], "%Y-%m-%d"),
} }
for key in ("artists", "authors", "groups", "characters", for key in ("artists", "authors", "groups", "characters",
"relationships", "parodies"): "relationships", "parodies"):

View File

@@ -84,7 +84,7 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
"type" : info["type"].capitalize(), "type" : info["type"].capitalize(),
"language" : language, "language" : language,
"lang" : util.language_to_code(language), "lang" : util.language_to_code(language),
"date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"), "date" : self.parse_datetime_iso(date),
"tags" : tags, "tags" : tags,
"artist" : [o["artist"] for o in iget("artists") or ()], "artist" : [o["artist"] for o in iget("artists") or ()],
"group" : [o["group"] for o in iget("groups") or ()], "group" : [o["group"] for o in iget("groups") or ()],

View File

@@ -53,11 +53,9 @@ class ImagechestGalleryExtractor(GalleryExtractor):
def _metadata_api(self, page): def _metadata_api(self, page):
post = self.api.post(self.gallery_id) post = self.api.post(self.gallery_id)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created"])
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
for img in post["images"]: for img in post["images"]:
img["date"] = text.parse_datetime( img["date"] = self.parse_datetime_iso(img["created"])
img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["gallery_id"] = self.gallery_id post["gallery_id"] = self.gallery_id
post.pop("image_count", None) post.pop("image_count", None)

View File

@@ -159,8 +159,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
"width" : text.parse_int(extr('"og:image:width" content="', '"')), "width" : text.parse_int(extr('"og:image:width" content="', '"')),
"height": text.parse_int(extr('"og:image:height" content="', '"')), "height": text.parse_int(extr('"og:image:height" content="', '"')),
"album" : extr("Added to <a", "</a>"), "album" : extr("Added to <a", "</a>"),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime_iso(extr('<span title="', '"')),
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : util.json_loads(extr( "user" : util.json_loads(extr(
"CHV.obj.resource=", "};") + "}").get("user"), "CHV.obj.resource=", "};") + "}").get("user"),
} }

View File

@@ -31,7 +31,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
"title": text.unescape(extr("<h1>", "</h1>")), "title": text.unescape(extr("<h1>", "</h1>")),
"count": text.parse_int(extr( "count": text.parse_int(extr(
"total of images in this gallery: ", " ")), "total of images in this gallery: ", " ")),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr("created on ", " by <") extr("created on ", " by <")
.replace("th, ", " ", 1).replace("nd, ", " ", 1) .replace("th, ", " ", 1).replace("nd, ", " ", 1)
.replace("st, ", " ", 1), "%B %d %Y at %H:%M"), .replace("st, ", " ", 1), "%B %d %Y at %H:%M"),

View File

@@ -38,7 +38,7 @@ class ImgurExtractor(Extractor):
image["url"] = url = \ image["url"] = url = \
f"https://i.imgur.com/{image['id']}.{image['ext']}" f"https://i.imgur.com/{image['id']}.{image['ext']}"
image["date"] = text.parse_datetime(image["created_at"]) image["date"] = self.parse_datetime_iso(image["created_at"])
image["_http_validate"] = self._validate image["_http_validate"] = self._validate
text.nameext_from_url(url, image) text.nameext_from_url(url, image)
@@ -106,7 +106,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
del album["media"] del album["media"]
count = len(images) count = len(images)
album["date"] = text.parse_datetime(album["created_at"]) album["date"] = self.parse_datetime_iso(album["created_at"])
try: try:
del album["ad_url"] del album["ad_url"]

View File

@@ -35,8 +35,8 @@ class InkbunnyExtractor(Extractor):
for post in self.posts(): for post in self.posts():
post.update(metadata) post.update(metadata)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(
post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") post["create_datetime"][:19])
post["tags"] = [kw["keyword_name"] for kw in post["keywords"]] post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
post["ratings"] = [r["name"] for r in post["ratings"]] post["ratings"] = [r["name"] for r in post["ratings"]]
files = post["files"] files = post["files"]
@@ -52,8 +52,8 @@ class InkbunnyExtractor(Extractor):
for post["num"], file in enumerate(files, 1): for post["num"], file in enumerate(files, 1):
post.update(file) post.update(file)
post["deleted"] = (file["deleted"] == "t") post["deleted"] = (file["deleted"] == "t")
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(
file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") file["create_datetime"][:19])
text.nameext_from_url(file["file_name"], post) text.nameext_from_url(file["file_name"], post)
url = file["file_url_full"] url = file["file_url_full"]

View File

@@ -173,7 +173,7 @@ class InstagramExtractor(Extractor):
post_url = f"{self.root}/stories/highlights/{reel_id}/" post_url = f"{self.root}/stories/highlights/{reel_id}/"
data = { data = {
"user" : post.get("user"), "user" : post.get("user"),
"expires": text.parse_timestamp(expires), "expires": self.parse_timestamp(expires),
"post_id": reel_id, "post_id": reel_id,
"post_shortcode": shortcode_from_id(reel_id), "post_shortcode": shortcode_from_id(reel_id),
"post_url": post_url, "post_url": post_url,
@@ -224,7 +224,7 @@ class InstagramExtractor(Extractor):
data["owner_id"] = owner["pk"] data["owner_id"] = owner["pk"]
data["username"] = owner.get("username") data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name") data["fullname"] = owner.get("full_name")
data["post_date"] = data["date"] = text.parse_timestamp( data["post_date"] = data["date"] = self.parse_timestamp(
post.get("taken_at") or post.get("created_at") or post.get("seen")) post.get("taken_at") or post.get("created_at") or post.get("seen"))
data["_files"] = files = [] data["_files"] = files = []
for num, item in enumerate(items, 1): for num, item in enumerate(items, 1):
@@ -278,7 +278,7 @@ class InstagramExtractor(Extractor):
media = { media = {
"num" : num, "num" : num,
"date" : text.parse_timestamp(item.get("taken_at") or "date" : self.parse_timestamp(item.get("taken_at") or
media.get("taken_at") or media.get("taken_at") or
post.get("taken_at")), post.get("taken_at")),
"media_id" : item["pk"], "media_id" : item["pk"],
@@ -299,7 +299,7 @@ class InstagramExtractor(Extractor):
if "reshared_story_media_author" in item: if "reshared_story_media_author" in item:
media["author"] = item["reshared_story_media_author"] media["author"] = item["reshared_story_media_author"]
if "expiring_at" in item: if "expiring_at" in item:
media["expires"] = text.parse_timestamp(post["expiring_at"]) media["expires"] = self.parse_timestamp(post["expiring_at"])
self._extract_tagged_users(item, media) self._extract_tagged_users(item, media)
files.append(media) files.append(media)
@@ -342,7 +342,7 @@ class InstagramExtractor(Extractor):
"post_id" : post["id"], "post_id" : post["id"],
"post_shortcode": post["shortcode"], "post_shortcode": post["shortcode"],
"post_url" : f"{self.root}/p/{post['shortcode']}/", "post_url" : f"{self.root}/p/{post['shortcode']}/",
"post_date" : text.parse_timestamp(post["taken_at_timestamp"]), "post_date" : self.parse_timestamp(post["taken_at_timestamp"]),
"description": text.parse_unicode_escapes("\n".join( "description": text.parse_unicode_escapes("\n".join(
edge["node"]["text"] edge["node"]["text"]
for edge in post["edge_media_to_caption"]["edges"] for edge in post["edge_media_to_caption"]["edges"]
@@ -634,7 +634,7 @@ class InstagramStoriesTrayExtractor(InstagramExtractor):
def items(self): def items(self):
base = f"{self.root}/stories/id:" base = f"{self.root}/stories/id:"
for story in self.api.reels_tray(): for story in self.api.reels_tray():
story["date"] = text.parse_timestamp(story["latest_reel_media"]) story["date"] = self.parse_timestamp(story["latest_reel_media"])
story["_extractor"] = InstagramStoriesExtractor story["_extractor"] = InstagramStoriesExtractor
yield Message.Queue, f"{base}{story['id']}/", story yield Message.Queue, f"{base}{story['id']}/", story

View File

@@ -36,8 +36,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
'{"":' + data.replace('\\"', '"'))) '{"":' + data.replace('\\"', '"')))
doc = data["initialDocumentData"]["document"] doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime( doc["date"] = self.parse_datetime_iso(
doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ") doc["originalPublishDateInISOString"])
self.count = text.parse_int(doc["pageCount"]) self.count = text.parse_int(doc["pageCount"])
self.base = (f"https://image.isu.pub/{doc['revisionId']}-" self.base = (f"https://image.isu.pub/{doc['revisionId']}-"

View File

@@ -32,8 +32,7 @@ class ItakuExtractor(Extractor):
def items(self): def items(self):
if images := self.images(): if images := self.images():
for image in images: for image in images:
image["date"] = text.parse_datetime( image["date"] = self.parse_datetime_iso(image["date_added"])
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
for category, tags in image.pop("categorized_tags").items(): for category, tags in image.pop("categorized_tags").items():
image[f"tags_{category.lower()}"] = [ image[f"tags_{category.lower()}"] = [
t["name"] for t in tags] t["name"] for t in tags]
@@ -60,15 +59,14 @@ class ItakuExtractor(Extractor):
for post in posts: for post in posts:
images = post.pop("gallery_images") or () images = post.pop("gallery_images") or ()
post["count"] = len(images) post["count"] = len(images)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["date_added"])
post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["tags"] = [t["name"] for t in post["tags"]] post["tags"] = [t["name"] for t in post["tags"]]
yield Message.Directory, post yield Message.Directory, post
for post["num"], image in enumerate(images, 1): for post["num"], image in enumerate(images, 1):
post["file"] = image post["file"] = image
image["date"] = text.parse_datetime( image["date"] = self.parse_datetime_iso(
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ") image["date_added"])
url = image["image"] url = image["image"]
yield Message.Url, url, text.nameext_from_url(url, post) yield Message.Url, url, text.nameext_from_url(url, post)

View File

@@ -122,10 +122,10 @@ class IwaraExtractor(Extractor):
info["file_id"] = file_info.get("id") info["file_id"] = file_info.get("id")
info["filename"] = filename info["filename"] = filename
info["extension"] = extension info["extension"] = extension
info["date"] = text.parse_datetime( info["date"] = self.parse_datetime_iso(
file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ") file_info.get("createdAt"))
info["date_updated"] = text.parse_datetime( info["date_updated"] = self.parse_datetime_iso(
file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ") file_info.get("updatedAt"))
info["mime"] = file_info.get("mime") info["mime"] = file_info.get("mime")
info["size"] = file_info.get("size") info["size"] = file_info.get("size")
info["width"] = file_info.get("width") info["width"] = file_info.get("width")
@@ -144,8 +144,7 @@ class IwaraExtractor(Extractor):
"status" : user.get("status"), "status" : user.get("status"),
"role" : user.get("role"), "role" : user.get("role"),
"premium": user.get("premium"), "premium": user.get("premium"),
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(user.get("createdAt")),
user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"),
"description": profile.get("body"), "description": profile.get("body"),
} }

View File

@@ -32,8 +32,7 @@ class KabeuchiUserExtractor(Extractor):
if post.get("is_ad") or not post["image1"]: if post.get("is_ad") or not post["image1"]:
continue continue
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"])
post["created_at"], "%Y-%m-%d %H:%M:%S")
yield Message.Directory, post yield Message.Directory, post
for key in keys: for key in keys:

View File

@@ -244,7 +244,7 @@ class KemonoExtractor(Extractor):
def _parse_datetime(self, date_string): def _parse_datetime(self, date_string):
if len(date_string) > 19: if len(date_string) > 19:
date_string = date_string[:19] date_string = date_string[:19]
return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S") return self.parse_datetime_iso(date_string)
def _revisions(self, posts): def _revisions(self, posts):
return itertools.chain.from_iterable( return itertools.chain.from_iterable(

View File

@@ -119,8 +119,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
'property="image:width" content="', '"')), 'property="image:width" content="', '"')),
"height": text.parse_int(extr( "height": text.parse_int(extr(
'property="image:height" content="', '"')), 'property="image:height" content="', '"')),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime_iso(extr('<span title="', '"')),
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
} }
text.nameext_from_url(data["url"], data) text.nameext_from_url(data["url"], data)

View File

@@ -45,7 +45,7 @@ class LivedoorExtractor(Extractor):
"title" : text.unescape(extr('dc:title="', '"')), "title" : text.unescape(extr('dc:title="', '"')),
"categories" : extr('dc:subject="', '"').partition(",")[::2], "categories" : extr('dc:subject="', '"').partition(",")[::2],
"description": extr('dc:description="', '"'), "description": extr('dc:description="', '"'),
"date" : text.parse_datetime(extr('dc:date="', '"')), "date" : self.parse_datetime_iso(extr('dc:date="', '"')),
"tags" : text.split_html(tags)[1:] if tags else [], "tags" : text.split_html(tags)[1:] if tags else [],
"user" : self.user, "user" : self.user,
"body" : body, "body" : body,

View File

@@ -29,7 +29,7 @@ class LofterExtractor(Extractor):
post = post["post"] post = post["post"]
post["blog_name"] = post["blogInfo"]["blogName"] post["blog_name"] = post["blogInfo"]["blogName"]
post["date"] = text.parse_timestamp(post["publishTime"] // 1000) post["date"] = self.parse_timestamp(post["publishTime"] // 1000)
post_type = post["type"] post_type = post["type"]
# Article # Article

View File

@@ -69,7 +69,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
image["thumbnail"] = "" image["thumbnail"] = ""
image["tags"] = [item["text"] for item in image["tags"]] image["tags"] = [item["text"] for item in image["tags"]]
image["date"] = text.parse_timestamp(image["created"]) image["date"] = self.parse_timestamp(image["created"])
image["id"] = text.parse_int(image["id"]) image["id"] = text.parse_int(image["id"])
url = (image["url_to_original"] or image["url_to_video"] url = (image["url_to_original"] or image["url_to_video"]
@@ -188,7 +188,7 @@ fragment AlbumStandard on Album {
album["created_by"] = album["created_by"]["display_name"] album["created_by"] = album["created_by"]["display_name"]
album["id"] = text.parse_int(album["id"]) album["id"] = text.parse_int(album["id"])
album["date"] = text.parse_timestamp(album["created"]) album["date"] = self.parse_timestamp(album["created"])
return album return album

View File

@@ -47,8 +47,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
"path": text.unescape(extr('href="', '"')), "path": text.unescape(extr('href="', '"')),
"chapter_string": text.unescape(extr(">", "<")), "chapter_string": text.unescape(extr(">", "<")),
"size": text.parse_bytes(extr("<td>", "</td>")), "size": text.parse_bytes(extr("<td>", "</td>")),
"date": text.parse_datetime( "date": self.parse_datetime_iso(extr("<td>", "</td>").strip()),
extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
}) })
if self.config("chapter-reverse"): if self.config("chapter-reverse"):

View File

@@ -68,7 +68,7 @@ class MangadexExtractor(Extractor):
"chapter" : text.parse_int(chnum), "chapter" : text.parse_int(chnum),
"chapter_minor": f"{sep}{minor}", "chapter_minor": f"{sep}{minor}",
"chapter_id": chapter["id"], "chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]), "date" : self.parse_datetime_iso(cattributes["publishAt"]),
"group" : [group["attributes"]["name"] "group" : [group["attributes"]["name"]
for group in relationships["scanlation_group"]], for group in relationships["scanlation_group"]],
"lang" : lang, "lang" : lang,
@@ -109,8 +109,8 @@ class MangadexCoversExtractor(MangadexExtractor):
"cover" : cattributes["fileName"], "cover" : cattributes["fileName"],
"lang" : cattributes.get("locale"), "lang" : cattributes.get("locale"),
"volume" : text.parse_int(cattributes["volume"]), "volume" : text.parse_int(cattributes["volume"]),
"date" : text.parse_datetime(cattributes["createdAt"]), "date" : self.parse_datetime_iso(cattributes["createdAt"]),
"date_updated": text.parse_datetime(cattributes["updatedAt"]), "date_updated": self.parse_datetime_iso(cattributes["updatedAt"]),
} }
@@ -454,7 +454,7 @@ def _manga_info(self, uuid):
"manga_id": manga["id"], "manga_id": manga["id"],
"manga_titles": [t.popitem()[1] "manga_titles": [t.popitem()[1]
for t in mattr.get("altTitles") or ()], for t in mattr.get("altTitles") or ()],
"manga_date" : text.parse_datetime(mattr.get("createdAt")), "manga_date" : self.parse_datetime_iso(mattr.get("createdAt")),
"description" : (mattr["description"].get("en") or "description" : (mattr["description"].get("en") or
next(iter(mattr["description"].values()), "")), next(iter(mattr["description"].values()), "")),
"demographic": mattr.get("publicationDemographic"), "demographic": mattr.get("publicationDemographic"),

View File

@@ -99,7 +99,7 @@ class MangafoxMangaExtractor(MangaExtractor):
"chapter" : text.parse_int(chapter), "chapter" : text.parse_int(chapter),
"chapter_minor" : minor or "", "chapter_minor" : minor or "",
"chapter_string": cstr, "chapter_string": cstr,
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('right">', '</span>'), "%b %d, %Y"), extr('right">', '</span>'), "%b %d, %Y"),
} }
chapter.update(data) chapter.update(data)

View File

@@ -50,10 +50,10 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
extr = text.extract_from(page) extr = text.extract_from(page)
data = { data = {
"date" : text.parse_datetime(extr( "date" : self.parse_datetime_iso(extr(
'"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), '"datePublished": "', '"')[:19]),
"date_updated": text.parse_datetime(extr( "date_updated": self.parse_datetime_iso(extr(
'"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), '"dateModified": "', '"')[:19]),
"manga_id" : text.parse_int(extr("comic_id =", ";")), "manga_id" : text.parse_int(extr("comic_id =", ";")),
"chapter_id" : text.parse_int(extr("chapter_id =", ";")), "chapter_id" : text.parse_int(extr("chapter_id =", ";")),
"manga" : extr("comic_name =", ";").strip('" '), "manga" : extr("comic_name =", ";").strip('" '),
@@ -99,7 +99,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
manga = text.unescape(extr("<h1>", "<")) manga = text.unescape(extr("<h1>", "<"))
author = text.remove_html(extr("<li>Author(s) :", "</a>")) author = text.remove_html(extr("<li>Author(s) :", "</a>"))
status = extr("<li>Status :", "<").strip() status = extr("<li>Status :", "<").strip()
update = text.parse_datetime(extr( update = self.parse_datetime(extr(
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p") "<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p")
tags = text.split_html(extr(">Genres :", "</li>"))[::2] tags = text.split_html(extr(">Genres :", "</li>"))[::2]
@@ -121,7 +121,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
"chapter" : text.parse_int(chapter), "chapter" : text.parse_int(chapter),
"chapter_minor": (sep and ".") + minor, "chapter_minor": (sep and ".") + minor,
"title" : title.partition(": ")[2], "title" : title.partition(": ")[2],
"date" : text.parse_datetime(date, "%b-%d-%Y %H:%M"), "date" : self.parse_datetime(date, "%b-%d-%Y %H:%M"),
"lang" : "en", "lang" : "en",
"language": "English", "language": "English",
})) }))

View File

@@ -101,7 +101,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"language" : util.code_to_language(lang), "language" : util.code_to_language(lang),
"source" : chapter["srcTitle"], "source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"], "source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(chapter["dateCreate"] // 1000), "date" : self.parse_timestamp(chapter["dateCreate"] // 1000),
} }
def images(self, _): def images(self, _):
@@ -138,7 +138,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"language" : util.code_to_language(lang), "language" : util.code_to_language(lang),
"source" : chapter["srcTitle"], "source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"], "source_id" : chapter["sourceId"],
"date" : text.parse_timestamp( "date" : self.parse_timestamp(
chapter["dateCreate"] // 1000), chapter["dateCreate"] // 1000),
"_extractor": MangaparkChapterExtractor, "_extractor": MangaparkChapterExtractor,
} }

View File

@@ -40,10 +40,8 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
"chapter_minor": str(round(minor, 5))[1:] if minor else "", "chapter_minor": str(round(minor, 5))[1:] if minor else "",
"chapter_id" : text.parse_int(chapter_id), "chapter_id" : text.parse_int(chapter_id),
"chapter_url" : comic["url"], "chapter_url" : comic["url"],
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(comic["datePublished"]),
comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"), "date_updated" : self.parse_datetime_iso(comic["dateModified"]),
"date_updated" : text.parse_datetime(
comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
} }
def images(self, page): def images(self, page):

View File

@@ -119,7 +119,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
"album": { "album": {
"id": self.album_id, "id": self.album_id,
"name": text.unescape(title), "name": text.unescape(title),
"date": text.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"), "date": self.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
"description": text.unescape(descr), "description": text.unescape(descr),
}, },
"count": text.parse_int(count), "count": text.parse_int(count),

View File

@@ -64,8 +64,7 @@ class MastodonExtractor(BaseExtractor):
status["count"] = len(attachments) status["count"] = len(attachments)
status["tags"] = [tag["name"] for tag in status["tags"]] status["tags"] = [tag["name"] for tag in status["tags"]]
status["date"] = text.parse_datetime( status["date"] = self.parse_datetime_iso(status["created_at"][:19])
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, status yield Message.Directory, status
for status["num"], media in enumerate(attachments, 1): for status["num"], media in enumerate(attachments, 1):
@@ -319,10 +318,8 @@ class MastodonAPI():
if code == 404: if code == 404:
raise exception.NotFoundError() raise exception.NotFoundError()
if code == 429: if code == 429:
self.extractor.wait(until=text.parse_datetime( self.extractor.wait(until=self.parse_datetime_iso(
response.headers["x-ratelimit-reset"], response.headers["x-ratelimit-reset"]))
"%Y-%m-%dT%H:%M:%S.%fZ",
))
continue continue
raise exception.AbortExtraction(response.json().get("error")) raise exception.AbortExtraction(response.json().get("error"))

View File

@@ -48,13 +48,11 @@ class MisskeyExtractor(BaseExtractor):
note["instance"] = self.instance note["instance"] = self.instance
note["instance_remote"] = note["user"]["host"] note["instance_remote"] = note["user"]["host"]
note["count"] = len(files) note["count"] = len(files)
note["date"] = text.parse_datetime( note["date"] = self.parse_datetime_iso(note["createdAt"])
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield Message.Directory, note yield Message.Directory, note
for note["num"], file in enumerate(files, 1): for note["num"], file in enumerate(files, 1):
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime_iso(file["createdAt"])
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
note["file"] = file note["file"] = file
url = file["url"] url = file["url"]
yield Message.Url, url, text.nameext_from_url(url, note) yield Message.Url, url, text.nameext_from_url(url, note)

View File

@@ -9,9 +9,8 @@
"""Extractors for Moebooru based sites""" """Extractors for Moebooru based sites"""
from .booru import BooruExtractor from .booru import BooruExtractor
from .. import text, util from .. import text, util, dt
import collections import collections
import datetime
class MoebooruExtractor(BooruExtractor): class MoebooruExtractor(BooruExtractor):
@@ -21,7 +20,7 @@ class MoebooruExtractor(BooruExtractor):
page_start = 1 page_start = 1
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_timestamp(post["created_at"]) post["date"] = dt.parse_ts(post["created_at"])
def _html(self, post): def _html(self, post):
url = f"{self.root}/post/show/{post['id']}" url = f"{self.root}/post/show/{post['id']}"
@@ -164,14 +163,14 @@ class MoebooruPopularExtractor(MoebooruExtractor):
date = (f"{params['year']:>04}-{params.get('month', '01'):>02}-" date = (f"{params['year']:>04}-{params.get('month', '01'):>02}-"
f"{params.get('day', '01'):>02}") f"{params.get('day', '01'):>02}")
else: else:
date = datetime.date.today().isoformat() date = dt.date.today().isoformat()
scale = self.scale scale = self.scale
if scale.startswith("by_"): if scale.startswith("by_"):
scale = scale[3:] scale = scale[3:]
if scale == "week": if scale == "week":
date = datetime.date.fromisoformat(date) date = dt.date.fromisoformat(date)
date = (date - datetime.timedelta(days=date.weekday())).isoformat() date = (date - dt.timedelta(days=date.weekday())).isoformat()
elif scale == "month": elif scale == "month":
date = date[:-3] date = date[:-3]

View File

@@ -9,9 +9,8 @@
"""Extractors for https://motherless.com/""" """Extractors for https://motherless.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, dt, exception
from ..cache import memcache from ..cache import memcache
from datetime import timedelta
BASE_PATTERN = r"(?:https?://)?motherless\.com" BASE_PATTERN = r"(?:https?://)?motherless\.com"
@@ -115,14 +114,14 @@ class MotherlessExtractor(Extractor):
return data return data
def _parse_datetime(self, dt): def _parse_datetime(self, dt_string):
if " ago" not in dt: if " ago" not in dt_string:
return text.parse_datetime(dt, "%d %b %Y") return dt.parse(dt_string, "%d %b %Y")
value = text.parse_int(dt[:-5]) value = text.parse_int(dt_string[:-5])
delta = timedelta(0, value*3600) if dt[-5] == "h" else timedelta(value) delta = (dt.timedelta(0, value*3600) if dt_string[-5] == "h" else
return (util.datetime_utcnow() - delta).replace( dt.timedelta(value))
hour=0, minute=0, second=0) return (dt.now() - delta).replace(hour=0, minute=0, second=0)
@memcache(keyarg=2) @memcache(keyarg=2)
def _extract_gallery_title(self, page, gallery_id): def _extract_gallery_title(self, page, gallery_id):

View File

@@ -9,8 +9,7 @@
"""Extractors for https://blog.naver.com/""" """Extractors for https://blog.naver.com/"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, util from .. import text, util, dt
import datetime
import time import time
@@ -67,11 +66,11 @@ class NaverBlogPostExtractor(NaverBlogBase, GalleryExtractor):
return data return data
def _parse_datetime(self, date_string): def _parse_datetime(self, dt_string):
if "" in date_string: if "" in dt_string:
ts = time.gmtime() ts = time.gmtime()
return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday) return dt.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M") return dt.parse(dt_string, "%Y. %m. %d. %H:%M")
def images(self, page): def images(self, page):
files = [] files = []

View File

@@ -31,17 +31,17 @@ class NaverChzzkExtractor(Extractor):
data["uid"] = data["objectId"] data["uid"] = data["objectId"]
data["user"] = comment["user"] data["user"] = comment["user"]
data["count"] = len(files) data["count"] = len(files)
data["date"] = text.parse_datetime( data["date"] = self.parse_datetime(
data["createdDate"], "%Y%m%d%H%M%S") data["createdDate"], "%Y%m%d%H%M%S")
yield Message.Directory, data yield Message.Directory, data
for data["num"], file in enumerate(files, 1): for data["num"], file in enumerate(files, 1):
if extra := file.get("extraJson"): if extra := file.get("extraJson"):
file.update(util.json_loads(extra)) file.update(util.json_loads(extra))
file["date"] = text.parse_datetime( file["date"] = self.parse_datetime_iso(
file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z") file["createdDate"])
file["date_updated"] = text.parse_datetime( file["date_updated"] = self.parse_datetime_iso(
file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z") file["updatedDate"])
data["file"] = file data["file"] = file
url = file["attachValue"] url = file["attachValue"]
yield Message.Url, url, text.nameext_from_url(url, data) yield Message.Url, url, text.nameext_from_url(url, data)

View File

@@ -59,8 +59,8 @@ class NekohousePostExtractor(NekohouseExtractor):
'class="scrape__user-name', '</').rpartition(">")[2].strip()), 'class="scrape__user-name', '</').rpartition(">")[2].strip()),
"title" : text.unescape(extr( "title" : text.unescape(extr(
'class="scrape__title', '</').rpartition(">")[2]), 'class="scrape__title', '</').rpartition(">")[2]),
"date" : text.parse_datetime(extr( "date" : self.parse_datetime_iso(extr(
'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"), 'datetime="', '"')[:19]),
"content": text.unescape(extr( "content": text.unescape(extr(
'class="scrape__content">', "</div>").strip()), 'class="scrape__content">', "</div>").strip()),
} }

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.newgrounds.com/""" """Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util, dt, exception
from ..cache import cache from ..cache import cache
import itertools import itertools
@@ -218,7 +218,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),
"type" : "art", "type" : "art",
"_type" : "i", "_type" : "i",
"date" : text.parse_datetime(extr( "date" : dt.parse_iso(extr(
'itemprop="datePublished" content="', '"')), 'itemprop="datePublished" content="', '"')),
"rating" : extr('class="rated-', '"'), "rating" : extr('class="rated-', '"'),
"url" : full('src="', '"'), "url" : full('src="', '"'),
@@ -268,7 +268,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),
"type" : "audio", "type" : "audio",
"_type" : "a", "_type" : "a",
"date" : text.parse_datetime(extr( "date" : dt.parse_iso(extr(
'itemprop="datePublished" content="', '"')), 'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"), "url" : extr('{"url":"', '"').replace("\\/", "/"),
"index" : text.parse_int(index), "index" : text.parse_int(index),
@@ -287,7 +287,7 @@ class NewgroundsExtractor(Extractor):
src = src.replace("\\/", "/") src = src.replace("\\/", "/")
formats = () formats = ()
type = extr(',"description":"', '"') type = extr(',"description":"', '"')
date = text.parse_datetime(extr( date = dt.parse_iso(extr(
'itemprop="datePublished" content="', '"')) 'itemprop="datePublished" content="', '"'))
if type: if type:
type = type.rpartition(" ")[2].lower() type = type.rpartition(" ")[2].lower()
@@ -302,7 +302,7 @@ class NewgroundsExtractor(Extractor):
sources = self.request_json(url, headers=headers)["sources"] sources = self.request_json(url, headers=headers)["sources"]
formats = self._video_formats(sources) formats = self._video_formats(sources)
src = next(formats, "") src = next(formats, "")
date = text.parse_timestamp(src.rpartition("?")[2]) date = self.parse_timestamp(src.rpartition("?")[2])
type = "movie" type = "movie"
return { return {

View File

@@ -9,7 +9,7 @@
"""Extractors for nijie instances""" """Extractors for nijie instances"""
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
from .. import text, exception from .. import text, dt, exception
from ..cache import cache from ..cache import cache
@@ -82,8 +82,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"title" : keywords[0].strip(), "title" : keywords[0].strip(),
"description": text.unescape(extr( "description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")), '"description": "', '"').replace("&amp;", "&")),
"date" : text.parse_datetime(extr( "date" : dt.parse(extr(
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9), '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"
) - dt.timedelta(hours=9),
"artist_id" : text.parse_int(extr('/members.php?id=', '"')), "artist_id" : text.parse_int(extr('/members.php?id=', '"')),
"artist_name": keywords[1], "artist_name": keywords[1],
"tags" : keywords[2:-1], "tags" : keywords[2:-1],
@@ -101,9 +102,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"artist_id" : text.parse_int(extr('members.php?id=', '"')), "artist_id" : text.parse_int(extr('members.php?id=', '"')),
"artist_name": keywords[1], "artist_name": keywords[1],
"tags" : keywords[2:-1], "tags" : keywords[2:-1],
"date" : text.parse_datetime(extr( "date" : dt.parse_iso(extr(
"itemprop='datePublished' content=", "<").rpartition(">")[2], "itemprop='datePublished' content=", "<").rpartition(">")[2]
"%Y-%m-%d %H:%M:%S", 9), ) - dt.timedelta(hours=9),
} }
def _extract_images(self, image_id, page): def _extract_images(self, image_id, page):

View File

@@ -114,7 +114,7 @@ class NitterExtractor(BaseExtractor):
return { return {
"author" : author, "author" : author,
"user" : self.user_obj or author, "user" : self.user_obj or author,
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"), extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0], "tweet_id": link.rpartition("/")[2].partition("#")[0],
"content": extr('class="tweet-content', "</div").partition(">")[2], "content": extr('class="tweet-content', "</div").partition(">")[2],
@@ -142,7 +142,7 @@ class NitterExtractor(BaseExtractor):
return { return {
"author" : author, "author" : author,
"user" : self.user_obj or author, "user" : self.user_obj or author,
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"), extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0], "tweet_id": link.rpartition("/")[2].partition("#")[0],
"content" : extr('class="quote-text', "</div").partition(">")[2], "content" : extr('class="quote-text', "</div").partition(">")[2],
@@ -173,7 +173,7 @@ class NitterExtractor(BaseExtractor):
"nick" : extr('title="', '"'), "nick" : extr('title="', '"'),
"name" : extr('title="@', '"'), "name" : extr('title="@', '"'),
"description" : extr('<p dir="auto">', '<'), "description" : extr('<p dir="auto">', '<'),
"date" : text.parse_datetime( "date" : self.parse_datetime(
extr('class="profile-joindate"><span title="', '"'), extr('class="profile-joindate"><span title="', '"'),
"%I:%M %p - %d %b %Y"), "%I:%M %p - %d %b %Y"),
"statuses_count" : text.parse_int(extr( "statuses_count" : text.parse_int(extr(

View File

@@ -9,7 +9,7 @@
"""Extractors for https://nozomi.la/""" """Extractors for https://nozomi.la/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text, dt
def decode_nozomi(n): def decode_nozomi(n):
@@ -49,10 +49,9 @@ class NozomiExtractor(Extractor):
post["character"] = self._list(post.get("character")) post["character"] = self._list(post.get("character"))
try: try:
post["date"] = text.parse_datetime( post["date"] = dt.parse_iso(post["date"] + ":00")
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
except Exception: except Exception:
post["date"] = None post["date"] = dt.NONE
post.update(data) post.update(data)

View File

@@ -53,8 +53,7 @@ class PahealExtractor(Extractor):
extr("<source src='", "'")), extr("<source src='", "'")),
"uploader": text.unquote(extr( "uploader": text.unquote(extr(
"class='username' href='/user/", "'")), "class='username' href='/user/", "'")),
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(extr("datetime='", "'")),
extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
"source" : text.unescape(text.extr( "source" : text.unescape(text.extr(
extr(">Source Link<", "</td>"), "href='", "'")), extr(">Source Link<", "</td>"), "href='", "'")),
} }
@@ -133,7 +132,7 @@ class PahealTagExtractor(PahealExtractor):
"duration" : text.parse_float(duration[:-1]), "duration" : text.parse_float(duration[:-1]),
"tags" : text.unescape(tags), "tags" : text.unescape(tags),
"size" : text.parse_bytes(size[:-1]), "size" : text.parse_bytes(size[:-1]),
"date" : text.parse_datetime(date, "%B %d, %Y; %H:%M"), "date" : self.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : f"{pid} - {tags}", "filename" : f"{pid} - {tags}",
"extension": ext, "extension": ext,
} }

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.patreon.com/""" """Extractors for https://www.patreon.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, dt, exception
from ..cache import memcache from ..cache import memcache
import collections import collections
import itertools import itertools
@@ -177,8 +177,7 @@ class PatreonExtractor(Extractor):
post, included, "attachments") post, included, "attachments")
attr["attachments_media"] = self._files( attr["attachments_media"] = self._files(
post, included, "attachments_media") post, included, "attachments_media")
attr["date"] = text.parse_datetime( attr["date"] = self.parse_datetime_iso(attr["published_at"])
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
try: try:
attr["campaign"] = (included["campaign"][ attr["campaign"] = (included["campaign"][
@@ -226,8 +225,7 @@ class PatreonExtractor(Extractor):
user = response.json()["data"] user = response.json()["data"]
attr = user["attributes"] attr = user["attributes"]
attr["id"] = user["id"] attr["id"] = user["id"]
attr["date"] = text.parse_datetime( attr["date"] = self.parse_datetime_iso(attr["created"])
attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
return attr return attr
def _collection(self, collection_id): def _collection(self, collection_id):
@@ -236,8 +234,7 @@ class PatreonExtractor(Extractor):
coll = data["data"] coll = data["data"]
attr = coll["attributes"] attr = coll["attributes"]
attr["id"] = coll["id"] attr["id"] = coll["id"]
attr["date"] = text.parse_datetime( attr["date"] = self.parse_datetime_iso(attr["created_at"])
attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
return attr return attr
def _filename(self, url): def _filename(self, url):
@@ -445,8 +442,7 @@ class PatreonUserExtractor(PatreonExtractor):
def posts(self): def posts(self):
if date_max := self._get_date_min_max(None, None)[1]: if date_max := self._get_date_min_max(None, None)[1]:
self._cursor = cursor = \ self._cursor = cursor = dt.from_ts(date_max).isoformat()
util.datetime_from_timestamp(date_max).isoformat()
self._init_cursor = lambda: cursor self._init_cursor = lambda: cursor
url = self._build_url("stream", ( url = self._build_url("stream", (

View File

@@ -35,8 +35,7 @@ class PexelsExtractor(Extractor):
post["type"] = attr["type"] post["type"] = attr["type"]
post.update(metadata) post.update(metadata)
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"][:-5])
post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
if "image" in post: if "image" in post:
url, _, query = post["image"]["download_link"].partition("?") url, _, query = post["image"]["download_link"].partition("?")

View File

@@ -36,8 +36,7 @@ class PhilomenaExtractor(BooruExtractor):
return url return url
def _prepare(self, post): def _prepare(self, post):
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"][:19])
post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
BASE_PATTERN = PhilomenaExtractor.update({ BASE_PATTERN = PhilomenaExtractor.update({

View File

@@ -29,8 +29,7 @@ class PhotovogueUserExtractor(Extractor):
for photo in self.photos(): for photo in self.photos():
url = photo["gallery_image"] url = photo["gallery_image"]
photo["title"] = photo["title"].strip() photo["title"] = photo["title"].strip()
photo["date"] = text.parse_datetime( photo["date"] = self.parse_datetime_iso(photo["date"])
photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield Message.Directory, photo yield Message.Directory, photo
yield Message.Url, url, text.nameext_from_url(url, photo) yield Message.Url, url, text.nameext_from_url(url, photo)

View File

@@ -29,8 +29,7 @@ class PicartoGalleryExtractor(Extractor):
def items(self): def items(self):
for post in self.posts(): for post in self.posts():
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"])
post["created_at"], "%Y-%m-%d %H:%M:%S")
variations = post.pop("variations", ()) variations = post.pop("variations", ())
yield Message.Directory, post yield Message.Directory, post

View File

@@ -26,8 +26,7 @@ class PiczelExtractor(Extractor):
def items(self): def items(self):
for post in self.posts(): for post in self.posts():
post["tags"] = [t["title"] for t in post["tags"] if t["title"]] post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"])
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
if post["multi"]: if post["multi"]:
images = post["images"] images = post["images"]

View File

@@ -48,8 +48,7 @@ class PillowfortExtractor(Extractor):
for url in inline(post["content"]): for url in inline(post["content"]):
files.append({"url": url}) files.append({"url": url})
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(post["created_at"])
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["post_id"] = post.pop("id") post["post_id"] = post.pop("id")
post["count"] = len(files) post["count"] = len(files)
yield Message.Directory, post yield Message.Directory, post
@@ -76,8 +75,7 @@ class PillowfortExtractor(Extractor):
if "id" not in file: if "id" not in file:
post["id"] = post["hash"] post["id"] = post["hash"]
if "created_at" in file: if "created_at" in file:
post["date"] = text.parse_datetime( post["date"] = self.parse_datetime_iso(file["created_at"])
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield msgtype, url, post yield msgtype, url, post

View File

@@ -24,10 +24,6 @@ class PixeldrainExtractor(Extractor):
if api_key := self.config("api-key"): if api_key := self.config("api-key"):
self.session.auth = util.HTTPBasicAuth("", api_key) self.session.auth = util.HTTPBasicAuth("", api_key)
def parse_datetime(self, date_string):
return text.parse_datetime(
date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
class PixeldrainFileExtractor(PixeldrainExtractor): class PixeldrainFileExtractor(PixeldrainExtractor):
"""Extractor for pixeldrain files""" """Extractor for pixeldrain files"""
@@ -45,7 +41,7 @@ class PixeldrainFileExtractor(PixeldrainExtractor):
file = self.request_json(url + "/info") file = self.request_json(url + "/info")
file["url"] = url + "?download" file["url"] = url + "?download"
file["date"] = self.parse_datetime(file["date_upload"]) file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file) text.nameext_from_url(file["name"], file)
yield Message.Directory, file yield Message.Directory, file
@@ -72,7 +68,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
files = album["files"] files = album["files"]
album["count"] = album["file_count"] album["count"] = album["file_count"]
album["date"] = self.parse_datetime(album["date_created"]) album["date"] = self.parse_datetime_iso(album["date_created"])
if self.file_index: if self.file_index:
idx = text.parse_int(self.file_index) idx = text.parse_int(self.file_index)
@@ -91,7 +87,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
file["album"] = album file["album"] = album
file["num"] = num file["num"] = num
file["url"] = url = f"{self.root}/api/file/{file['id']}?download" file["url"] = url = f"{self.root}/api/file/{file['id']}?download"
file["date"] = self.parse_datetime(file["date_upload"]) file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file) text.nameext_from_url(file["name"], file)
yield Message.Url, url, file yield Message.Url, url, file
@@ -112,7 +108,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor):
"mime_type" : data["file_type"], "mime_type" : data["file_type"],
"size" : data["file_size"], "size" : data["file_size"],
"hash_sha256": data["sha256_sum"], "hash_sha256": data["sha256_sum"],
"date" : self.parse_datetime(data["created"]), "date" : self.parse_datetime_iso(data["created"]),
} }
def items(self): def items(self):

View File

@@ -9,9 +9,8 @@
"""Extractors for https://www.pixiv.net/""" """Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message, Dispatch from .common import Extractor, Message, Dispatch
from .. import text, util, exception from .. import text, util, dt, exception
from ..cache import cache, memcache from ..cache import cache, memcache
from datetime import datetime, timedelta
import itertools import itertools
import hashlib import hashlib
@@ -96,7 +95,7 @@ class PixivExtractor(Extractor):
if transform_tags: if transform_tags:
transform_tags(work) transform_tags(work)
work["num"] = 0 work["num"] = 0
work["date"] = text.parse_datetime(work["create_date"]) work["date"] = dt.parse_iso(work["create_date"])
work["rating"] = ratings.get(work["x_restrict"]) work["rating"] = ratings.get(work["x_restrict"])
work["suffix"] = "" work["suffix"] = ""
work.update(metadata) work.update(metadata)
@@ -353,10 +352,10 @@ class PixivExtractor(Extractor):
if fmt in urls: if fmt in urls:
yield urls[fmt] yield urls[fmt]
def _date_from_url(self, url, offset=timedelta(hours=9)): def _date_from_url(self, url, offset=dt.timedelta(hours=9)):
try: try:
_, _, _, _, _, y, m, d, H, M, S, _ = url.split("/") _, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
return datetime( return dt.datetime(
int(y), int(m), int(d), int(H), int(M), int(S)) - offset int(y), int(m), int(d), int(H), int(M), int(S)) - offset
except Exception: except Exception:
return None return None
@@ -715,8 +714,7 @@ class PixivRankingExtractor(PixivExtractor):
self.log.warning("invalid date '%s'", date) self.log.warning("invalid date '%s'", date)
date = None date = None
if not date: if not date:
now = util.datetime_utcnow() date = (dt.now() - dt.timedelta(days=1)).strftime("%Y-%m-%d")
date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date self.date = date
self.type = type = query.get("content") self.type = type = query.get("content")
@@ -891,8 +889,7 @@ class PixivSketchExtractor(Extractor):
for post in self.posts(): for post in self.posts():
media = post["media"] media = post["media"]
post["post_id"] = post["id"] post["post_id"] = post["id"]
post["date"] = text.parse_datetime( post["date"] = dt.parse_iso(post["created_at"])
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
util.delete_items(post, ("id", "media", "_links")) util.delete_items(post, ("id", "media", "_links"))
yield Message.Directory, post yield Message.Directory, post
@@ -972,7 +969,7 @@ class PixivNovelExtractor(PixivExtractor):
if transform_tags: if transform_tags:
transform_tags(novel) transform_tags(novel)
novel["num"] = 0 novel["num"] = 0
novel["date"] = text.parse_datetime(novel["create_date"]) novel["date"] = dt.parse_iso(novel["create_date"])
novel["rating"] = ratings.get(novel["x_restrict"]) novel["rating"] = ratings.get(novel["x_restrict"])
novel["suffix"] = "" novel["suffix"] = ""
@@ -1154,7 +1151,7 @@ class PixivAppAPI():
"get_secure_url": "1", "get_secure_url": "1",
} }
time = util.datetime_utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00") time = dt.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
headers = { headers = {
"X-Client-Time": time, "X-Client-Time": time,
"X-Client-Hash": hashlib.md5( "X-Client-Hash": hashlib.md5(
@@ -1329,11 +1326,11 @@ class PixivAppAPI():
sort = params["sort"] sort = params["sort"]
if sort == "date_desc": if sort == "date_desc":
date_key = "end_date" date_key = "end_date"
date_off = timedelta(days=1) date_off = dt.timedelta(days=1)
date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731 date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731
elif sort == "date_asc": elif sort == "date_asc":
date_key = "start_date" date_key = "start_date"
date_off = timedelta(days=-1) date_off = dt.timedelta(days=-1)
date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731 date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731
else: else:
date_key = None date_key = None
@@ -1360,8 +1357,8 @@ class PixivAppAPI():
if date_key and text.parse_int(params.get("offset")) >= 5000: if date_key and text.parse_int(params.get("offset")) >= 5000:
date_last = data["illusts"][-1]["create_date"] date_last = data["illusts"][-1]["create_date"]
date_val = (text.parse_datetime( date_val = (dt.parse_iso(date_last) + date_off).strftime(
date_last) + date_off).strftime("%Y-%m-%d") "%Y-%m-%d")
self.log.info("Reached 'offset' >= 5000; " self.log.info("Reached 'offset' >= 5000; "
"Updating '%s' to '%s'", date_key, date_val) "Updating '%s' to '%s'", date_key, date_val)
params[date_key] = date_val params[date_key] = date_val

View File

@@ -9,8 +9,7 @@
"""Extractors for https://www.plurk.com/""" """Extractors for https://www.plurk.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, dt, exception
import datetime
class PlurkExtractor(Extractor): class PlurkExtractor(Extractor):
@@ -88,12 +87,10 @@ class PlurkTimelineExtractor(PlurkExtractor):
while plurks: while plurks:
yield from plurks yield from plurks
offset = datetime.datetime.strptime( offset = dt.parse(plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z") data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z")
response = self.request( plurks = self.request_json(
url, method="POST", headers=headers, data=data) url, method="POST", headers=headers, data=data)["plurks"]
plurks = response.json()["plurks"]
class PlurkPostExtractor(PlurkExtractor): class PlurkPostExtractor(PlurkExtractor):

View File

@@ -150,8 +150,7 @@ class PornhubGifExtractor(PornhubExtractor):
"tags" : extr("data-context-tag='", "'").split(","), "tags" : extr("data-context-tag='", "'").split(","),
"title": extr('"name": "', '"'), "title": extr('"name": "', '"'),
"url" : extr('"contentUrl": "', '"'), "url" : extr('"contentUrl": "', '"'),
"date" : text.parse_datetime( "date" : self.parse_datetime_iso(extr('"uploadDate": "', '"')),
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
"viewkey" : extr('From this video: ' "viewkey" : extr('From this video: '
'<a href="/view_video.php?viewkey=', '"'), '<a href="/view_video.php?viewkey=', '"'),
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'), "timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),

View File

@@ -31,7 +31,7 @@ class PostmillExtractor(BaseExtractor):
title = text.unescape(extr( title = text.unescape(extr(
'<meta property="og:title" content="', '">')) '<meta property="og:title" content="', '">'))
date = text.parse_datetime(extr( date = self.parse_datetime_iso(extr(
'<meta property="og:article:published_time" content="', '">')) '<meta property="og:article:published_time" content="', '">'))
username = extr( username = extr(
'<meta property="og:article:author" content="', '">') '<meta property="og:article:author" content="', '">')

View File

@@ -42,7 +42,7 @@ class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor):
"chapter_minor": sep + minor, "chapter_minor": sep + minor,
"chapter_id" : text.parse_int(item["cid"]), "chapter_id" : text.parse_int(item["cid"]),
"title" : text.unescape(title), "title" : text.unescape(title),
"date" : text.parse_datetime( "date" : self.parse_datetime(
date, "%Y-%m-%dWIB%H:%M:%S%z"), date, "%Y-%m-%dWIB%H:%M:%S%z"),
"thumbnail" : item.get("t"), "thumbnail" : item.get("t"),
"lang" : "ja", "lang" : "ja",

Some files were not shown because too many files have changed in this diff Show More