merge branch 'dt': move datetime utils into separate module

- use 'datetime.fromisoformat()' when possible (#7671)
- return a datetime-compatible object for invalid datetimes
  (instead of a 'str' value)
This commit is contained in:
Mike Fährmann
2025-10-20 09:30:05 +02:00
177 changed files with 652 additions and 708 deletions

115
gallery_dl/dt.py Normal file
View File

@@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Date/Time utilities"""
import sys
import time
from datetime import datetime, date, timedelta, timezone # noqa F401
class NullDatetime(datetime):
def __bool__(self):
return False
def __str__(self):
return "[Invalid DateTime]"
def __format__(self, format_spec):
return "[Invalid DateTime]"
NONE = NullDatetime(1, 1, 1)
EPOCH = datetime(1970, 1, 1)
SECOND = timedelta(0, 1)
def normalize(dt):
# if (o := dt.utcoffset()) is not None:
# return dt.replace(tzinfo=None, microsecond=0) - o
if dt.tzinfo is not None:
return dt.astimezone(timezone.utc).replace(tzinfo=None, microsecond=0)
if dt.microsecond:
return dt.replace(microsecond=0)
return dt
def convert(value):
"""Convert 'value' to a naive UTC datetime object"""
if not value:
return NONE
if isinstance(value, datetime):
return normalize(value)
if isinstance(value, str) and (dt := parse_iso(value)) is not NONE:
return dt
return parse_ts(value)
def parse(dt_string, format):
"""Parse 'dt_string' according to 'format'"""
try:
return normalize(datetime.strptime(dt_string, format))
except Exception:
return NONE
if sys.hexversion < 0x30c0000:
# Python <= 3.11
def parse_iso(dt_string):
"""Parse 'dt_string' as ISO 8601 value"""
try:
if dt_string[-1] == "Z":
# compat for Python < 3.11
dt_string = dt_string[:-1]
elif dt_string[-5] in "+-":
# compat for Python < 3.11
dt_string = f"{dt_string[:-2]}:{dt_string[-2:]}"
return normalize(datetime.fromisoformat(dt_string))
except Exception:
return NONE
from_ts = datetime.utcfromtimestamp
now = datetime.utcnow
else:
# Python >= 3.12
def parse_iso(dt_string):
"""Parse 'dt_string' as ISO 8601 value"""
try:
return normalize(datetime.fromisoformat(dt_string))
except Exception:
return NONE
def from_ts(ts=None):
"""Convert Unix timestamp to naive UTC datetime"""
Y, m, d, H, M, S, _, _, _ = time.gmtime(ts)
return datetime(Y, m, d, H, M, S)
now = from_ts
def parse_ts(ts, default=NONE):
"""Create a datetime object from a Unix timestamp"""
try:
return from_ts(int(ts))
except Exception:
return default
def to_ts(dt):
"""Convert naive UTC datetime to Unix timestamp"""
return (dt - EPOCH) / SECOND
def to_ts_string(dt):
"""Convert naive UTC datetime to Unix timestamp string"""
try:
return str((dt - EPOCH) // SECOND)
except Exception:
return ""

View File

@@ -46,7 +46,7 @@ class _2chThreadExtractor(Extractor):
for post in posts:
if files := post.get("files"):
post["post_name"] = post["name"]
post["date"] = text.parse_timestamp(post["timestamp"])
post["date"] = self.parse_timestamp(post["timestamp"])
del post["files"]
del post["name"]

View File

@@ -65,7 +65,7 @@ class _2chenThreadExtractor(Extractor):
extr = text.extract_from(post)
return {
"name" : text.unescape(extr("<span>", "</span>")),
"date" : text.parse_datetime(
"date" : self.parse_datetime(
extr("<time", "<").partition(">")[2],
"%d %b %Y (%a) %H:%M:%S"
),

View File

@@ -7,7 +7,7 @@
"""Extractors for https://4archive.org/"""
from .common import Extractor, Message
from .. import text, util
from .. import text, dt
class _4archiveThreadExtractor(Extractor):
@@ -37,7 +37,7 @@ class _4archiveThreadExtractor(Extractor):
for post in posts:
post.update(data)
post["time"] = int(util.datetime_to_timestamp(post["date"]))
post["time"] = int(dt.to_ts(post["date"]))
yield Message.Directory, post
if "url" in post:
yield Message.Url, post["url"], text.nameext_from_url(
@@ -61,10 +61,9 @@ class _4archiveThreadExtractor(Extractor):
extr = text.extract_from(post)
data = {
"name": extr('class="name">', "</span>"),
"date": text.parse_datetime(
"date": self.parse_datetime_iso(
(extr('class="dateTime">', "<") or
extr('class="dateTime postNum" >', "<")).strip(),
"%Y-%m-%d %H:%M:%S"),
extr('class="dateTime postNum" >', "<")).strip()),
"no" : text.parse_int(extr(">Post No.", "<")),
}
if 'class="file"' in post:

View File

@@ -9,9 +9,8 @@
"""Extractors for https://8chan.moe/"""
from .common import Extractor, Message
from .. import text, util
from .. import text, dt
from ..cache import memcache
from datetime import timedelta
import itertools
BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)"
@@ -44,7 +43,7 @@ class _8chanExtractor(Extractor):
def cookies_prepare(self):
# fetch captcha cookies
# (necessary to download without getting interrupted)
now = util.datetime_utcnow()
now = dt.now()
url = self.root + "/captcha.js"
params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")}
self.request(url, params=params).content
@@ -57,7 +56,7 @@ class _8chanExtractor(Extractor):
if cookie.domain.endswith(domain):
cookie.expires = None
if cookie.name == "captchaexpiration":
cookie.value = (now + timedelta(30, 300)).strftime(
cookie.value = (now + dt.timedelta(30, 300)).strftime(
"%a, %d %b %Y %H:%M:%S GMT")
return self.cookies

View File

@@ -85,8 +85,7 @@ class _8musesAlbumExtractor(Extractor):
"parent" : text.parse_int(album["parentId"]),
"views" : text.parse_int(album["numberViews"]),
"likes" : text.parse_int(album["numberLikes"]),
"date" : text.parse_datetime(
album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"),
"date" : self.parse_datetime_iso(album["updatedAt"]),
}
def _unobfuscate(self, data):

View File

@@ -33,7 +33,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
"gallery_id": text.parse_int(self.gallery_id),
"title" : text.unescape(extr('title="', '"')),
"studio" : extr(">studio</small>", "<").strip(),
"date" : text.parse_datetime(extr(
"date" : self.parse_datetime(extr(
">released</small>", "<").strip(), "%m/%d/%Y"),
"actors" : sorted(text.split_html(extr(
'<ul class="item-details item-cast-list ', '</ul>'))[1:]),

View File

@@ -33,7 +33,7 @@ class AgnphExtractor(booru.BooruExtractor):
self.cookies.set("confirmed_age", "true", domain="agn.ph")
def _prepare(self, post):
post["date"] = text.parse_timestamp(post["created_at"])
post["date"] = self.parse_timestamp(post["created_at"])
post["status"] = post["status"].strip()
post["has_children"] = ("true" in post["has_children"])

View File

@@ -182,11 +182,11 @@ class Ao3WorkExtractor(Ao3Extractor):
extr('<dd class="freeform tags">', "</dd>")),
"lang" : extr('<dd class="language" lang="', '"'),
"series" : extr('<dd class="series">', "</dd>"),
"date" : text.parse_datetime(
extr('<dd class="published">', "<"), "%Y-%m-%d"),
"date_completed": text.parse_datetime(
extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"),
"date_updated" : text.parse_timestamp(
"date" : self.parse_datetime_iso(extr(
'<dd class="published">', "<")),
"date_completed": self.parse_datetime_iso(extr(
'>Completed:</dt><dd class="status">', "<")),
"date_updated" : self.parse_timestamp(
path.rpartition("updated_at=")[2]),
"words" : text.parse_int(
extr('<dd class="words">', "<").replace(",", "")),

View File

@@ -49,8 +49,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
files = self._extract_files(post)
post["count"] = len(files)
post["date"] = text.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
post["post_url"] = post_url = \
f"{self.root}/b/{post['boardSlug']}/{post['id']}"
post["_http_headers"] = {"Referer": post_url + "?p=1"}

View File

@@ -126,8 +126,7 @@ class ArtstationExtractor(Extractor):
data["title"] = text.unescape(data["title"])
data["description"] = text.unescape(text.remove_html(
data["description"]))
data["date"] = text.parse_datetime(
data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
data["date"] = self.parse_datetime_iso(data["created_at"])
assets = data["assets"]
del data["assets"]

View File

@@ -9,10 +9,9 @@
"""Extractors for https://aryion.com/"""
from .common import Extractor, Message
from .. import text, util, exception
from .. import text, util, dt, exception
from ..cache import cache
from email.utils import parsedate_tz
from datetime import datetime
BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
@@ -156,7 +155,7 @@ class AryionExtractor(Extractor):
"artist": artist,
"path" : text.split_html(extr(
"cookiecrumb'>", '</span'))[4:-1:2],
"date" : datetime(*parsedate_tz(lmod)[:6]),
"date" : dt.datetime(*parsedate_tz(lmod)[:6]),
"size" : text.parse_int(clen),
"views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
"width" : text.parse_int(extr("Resolution</b>:", "x")),

View File

@@ -123,7 +123,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
"chapter_minor" : minor,
"chapter_string": info,
"chapter_id" : text.parse_int(self.chapter_id),
"date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
"date" : self.parse_timestamp(extr(' time="', '"')[:-3]),
}
def images(self, page):
@@ -167,8 +167,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
data["date"] = text.parse_datetime(
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
data["date"] = self.parse_datetime_iso(extr('time="', '"'))
url = f"{self.root}/title/{href}"
results.append((url, data.copy()))
@@ -188,9 +187,9 @@ def _manga_info(self, manga_id, page=None):
"manga" : data["name"][1],
"manga_id" : text.parse_int(manga_id),
"manga_slug" : data["slug"][1],
"manga_date" : text.parse_timestamp(
"manga_date" : self.parse_timestamp(
data["dateCreate"][1] // 1000),
"manga_date_updated": text.parse_timestamp(
"manga_date_updated": self.parse_timestamp(
data["dateUpdate"][1] / 1000),
"author" : json_list(data["authors"]),
"artist" : json_list(data["artists"]),

View File

@@ -67,7 +67,7 @@ class BehanceExtractor(Extractor):
tags = [tag["title"] for tag in tags]
data["tags"] = tags
data["date"] = text.parse_timestamp(
data["date"] = self.parse_timestamp(
data.get("publishedOn") or data.get("conceived_on") or 0)
if creator := data.get("creator"):

View File

@@ -144,8 +144,8 @@ class BellazonExtractor(Extractor):
"title": schema["headline"],
"views": stats[0]["userInteractionCount"],
"posts": stats[1]["userInteractionCount"],
"date" : text.parse_datetime(schema["datePublished"]),
"date_updated": text.parse_datetime(schema["dateModified"]),
"date" : self.parse_datetime_iso(schema["datePublished"]),
"date_updated": self.parse_datetime_iso(schema["dateModified"]),
"description" : text.unescape(schema["text"]).strip(),
"section" : path[-2],
"author" : author["name"],
@@ -169,7 +169,7 @@ class BellazonExtractor(Extractor):
post = {
"id": extr('id="elComment_', '"'),
"author_url": extr(" href='", "'"),
"date": text.parse_datetime(extr("datetime='", "'")),
"date": self.parse_datetime_iso(extr("datetime='", "'")),
"content": extr("<!-- Post content -->", "\n\t\t</div>"),
}

View File

@@ -40,7 +40,7 @@ class BloggerExtractor(BaseExtractor):
blog = self.api.blog_by_url("http://" + self.blog)
blog["pages"] = blog["pages"]["totalItems"]
blog["posts"] = blog["posts"]["totalItems"]
blog["date"] = text.parse_datetime(blog["published"])
blog["date"] = self.parse_datetime_iso(blog["published"])
del blog["selfLink"]
findall_image = util.re(
@@ -65,7 +65,7 @@ class BloggerExtractor(BaseExtractor):
post["author"] = post["author"]["displayName"]
post["replies"] = post["replies"]["totalItems"]
post["content"] = text.remove_html(content)
post["date"] = text.parse_datetime(post["published"])
post["date"] = self.parse_datetime_iso(post["published"])
del post["selfLink"]
del post["blog"]

View File

@@ -135,8 +135,7 @@ class BlueskyExtractor(Extractor):
post["instance"] = self.instance
post["post_id"] = self._pid(post)
post["date"] = text.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
def _extract_files(self, post):
if "embed" not in post:

View File

@@ -78,7 +78,7 @@ class BoostyExtractor(Extractor):
post["links"] = links = []
if "createdAt" in post:
post["date"] = text.parse_timestamp(post["createdAt"])
post["date"] = self.parse_timestamp(post["createdAt"])
for block in post["data"]:
try:

View File

@@ -70,8 +70,7 @@ class BoothItemExtractor(BoothExtractor):
url + ".json", headers=headers, interval=False)
item["booth_category"] = item.pop("category", None)
item["date"] = text.parse_datetime(
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
item["date"] = self.parse_datetime_iso(item["published_at"])
item["tags"] = [t["name"] for t in item["tags"]]
shop = item["shop"]

View File

@@ -168,7 +168,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
item, 'name: "', ".")
file["size"] = text.parse_int(text.extr(
item, "size: ", " ,\n"))
file["date"] = text.parse_datetime(text.extr(
file["date"] = self.parse_datetime(text.extr(
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
yield file

View File

@@ -28,7 +28,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
return {
"album_id" : self.page_url.rpartition("/")[2],
"album_name" : text.unescape(extr("<h1>", "<")),
"date" : text.parse_datetime(extr(
"date" : self.parse_datetime(extr(
"<p>Created ", "<"), "%B %d %Y"),
"description": text.unescape(extr("<p>", "<")),
}

View File

@@ -79,8 +79,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
"url" : url,
"album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}
@@ -116,8 +115,7 @@ class CheveretoVideoExtractor(CheveretoExtractor):
'class="far fa-clock"></i>', ""),
"album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}

View File

@@ -61,7 +61,7 @@ class CienArticleExtractor(CienExtractor):
post["post_url"] = url
post["post_id"] = text.parse_int(post_id)
post["count"] = len(files)
post["date"] = text.parse_datetime(post["datePublished"])
post["date"] = self.parse_datetime_iso(post["datePublished"])
try:
post["author"]["id"] = text.parse_int(author_id)

View File

@@ -86,8 +86,7 @@ class CivitaiExtractor(Extractor):
images = self.api.images_post(post["id"])
post = self.api.post(post["id"])
post["date"] = text.parse_datetime(
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["publishedAt"])
data = {
"post": post,
"user": post.pop("user"),
@@ -122,8 +121,7 @@ class CivitaiExtractor(Extractor):
data["post"] = post = self._extract_meta_post(file)
if post:
post.pop("user", None)
file["date"] = text.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
file["date"] = self.parse_datetime_iso(file["createdAt"])
data["url"] = url = self._url(file)
text.nameext_from_url(url, data)
@@ -180,8 +178,7 @@ class CivitaiExtractor(Extractor):
if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"])
if "date" not in file:
file["date"] = text.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
file["date"] = self.parse_datetime_iso(file["createdAt"])
if self._meta_generation:
file["generation"] = self._extract_meta_generation(file)
yield data
@@ -216,8 +213,7 @@ class CivitaiExtractor(Extractor):
def _extract_meta_post(self, image):
try:
post = self.api.post(image["postId"])
post["date"] = text.parse_datetime(
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["publishedAt"])
return post
except Exception as exc:
return self.log.traceback(exc)
@@ -278,8 +274,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
versions = (version,)
for version in versions:
version["date"] = text.parse_datetime(
version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
version["date"] = self.parse_datetime_iso(version["createdAt"])
data = {
"model" : model,
@@ -593,8 +588,7 @@ class CivitaiGeneratedExtractor(CivitaiExtractor):
self._require_auth()
for gen in self.api.orchestrator_queryGeneratedImages():
gen["date"] = text.parse_datetime(
gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
gen["date"] = self.parse_datetime_iso(gen["createdAt"])
yield Message.Directory, gen
for step in gen.pop("steps", ()):
for image in step.pop("images", ()):

View File

@@ -114,10 +114,8 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
"chapter_hid" : ch["hid"],
"chapter_string": chstr,
"group" : ch["group_name"],
"date" : text.parse_datetime(
ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"),
"date_updated" : text.parse_datetime(
ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
"date" : self.parse_datetime_iso(ch["created_at"][:19]),
"date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
"lang" : ch["lang"],
}

View File

@@ -60,6 +60,6 @@ class ComicvineTagExtractor(BooruExtractor):
_file_url = operator.itemgetter("original")
def _prepare(self, post):
post["date"] = text.parse_datetime(
post["date"] = self.parse_datetime(
post["dateCreated"], "%a, %b %d %Y")
post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]

View File

@@ -19,11 +19,10 @@ import getpass
import logging
import requests
import threading
from datetime import datetime
from xml.etree import ElementTree
from requests.adapters import HTTPAdapter
from .message import Message
from .. import config, output, text, util, cache, exception
from .. import config, output, text, util, dt, cache, exception
urllib3 = requests.packages.urllib3
@@ -64,6 +63,10 @@ class Extractor():
else:
self.category = CATEGORY_MAP[self.category]
self.parse_datetime = dt.parse
self.parse_datetime_iso = dt.parse_iso
self.parse_timestamp = dt.parse_ts
self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = ""
@@ -313,9 +316,9 @@ class Extractor():
seconds = float(seconds)
until = now + seconds
elif until:
if isinstance(until, datetime):
if isinstance(until, dt.datetime):
# convert to UTC timestamp
until = util.datetime_to_timestamp(until)
until = dt.to_ts(until)
else:
until = float(until)
seconds = until - now
@@ -327,7 +330,7 @@ class Extractor():
return
if reason:
t = datetime.fromtimestamp(until).time()
t = dt.datetime.fromtimestamp(until).time()
isotime = f"{t.hour:02}:{t.minute:02}:{t.second:02}"
self.log.info("Waiting until %s (%s)", isotime, reason)
time.sleep(seconds)
@@ -652,7 +655,7 @@ class Extractor():
self.log.warning(
"cookies: %s/%s expired at %s",
cookie.domain.lstrip("."), cookie.name,
datetime.fromtimestamp(cookie.expires))
dt.datetime.fromtimestamp(cookie.expires))
continue
elif diff <= 86400:
@@ -694,7 +697,7 @@ class Extractor():
ts = self.config(key, default)
if isinstance(ts, str):
try:
ts = int(datetime.strptime(ts, fmt).timestamp())
ts = int(dt.parse(ts, fmt).timestamp())
except ValueError as exc:
self.log.warning("Unable to parse '%s': %s", key, exc)
ts = default

View File

@@ -47,7 +47,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
"album_name" : text.unescape(extr('title="', '"')),
"album_size" : text.parse_bytes(extr(
'<p class="title">', "B")),
"date" : text.parse_datetime(extr(
"date" : self.parse_datetime(extr(
'<p class="title">', '<'), "%d.%m.%Y"),
"description": text.unescape(text.unescape( # double
desc.rpartition(" [R")[0])),

View File

@@ -113,7 +113,7 @@ class CyberfileFileExtractor(CyberfileExtractor):
"Filesize:", "</tr>"))[:-1]),
"tags" : text.split_html(extr(
"Keywords:", "</tr>")),
"date" : text.parse_datetime(text.remove_html(extr(
"date" : self.parse_datetime(text.remove_html(extr(
"Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"),
"permissions": text.remove_html(extr(
"Permissions:", "</tr>")).split(" &amp; "),

View File

@@ -9,8 +9,7 @@
"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances"""
from .common import BaseExtractor, Message
from .. import text, util
import datetime
from .. import text, util, dt
class DanbooruExtractor(BaseExtractor):
@@ -69,8 +68,7 @@ class DanbooruExtractor(BaseExtractor):
continue
text.nameext_from_url(url, post)
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = dt.parse_iso(post["created_at"])
post["tags"] = (
post["tag_string"].split(" ")
@@ -357,11 +355,11 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def metadata(self):
self.params = params = text.parse_query(self.groups[-1])
scale = params.get("scale", "day")
date = params.get("date") or datetime.date.today().isoformat()
date = params.get("date") or dt.date.today().isoformat()
if scale == "week":
date = datetime.date.fromisoformat(date)
date = (date - datetime.timedelta(days=date.weekday())).isoformat()
date = dt.date.fromisoformat(date)
date = (date - dt.timedelta(days=date.weekday())).isoformat()
elif scale == "month":
date = date[:-3]

View File

@@ -68,7 +68,7 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
"chapter_minor": minor,
"group" : manga["groups"][group_id].split(" & "),
"group_id" : text.parse_int(group_id),
"date" : text.parse_timestamp(data["release_date"][group_id]),
"date" : self.parse_timestamp(data["release_date"][group_id]),
"lang" : util.NONE,
"language" : util.NONE,
}

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.deviantart.com/"""
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from .. import text, util, dt, exception
from ..cache import cache, memcache
import collections
import mimetypes
@@ -259,7 +259,7 @@ class DeviantartExtractor(Extractor):
deviation["published_time"] = text.parse_int(
deviation["published_time"])
deviation["date"] = text.parse_timestamp(
deviation["date"] = self.parse_timestamp(
deviation["published_time"])
if self.comments:
@@ -1187,8 +1187,8 @@ class DeviantartStatusExtractor(DeviantartExtractor):
deviation["username"] = deviation["author"]["username"]
deviation["_username"] = deviation["username"].lower()
deviation["date"] = dt = text.parse_datetime(deviation["ts"])
deviation["published_time"] = int(util.datetime_to_timestamp(dt))
deviation["date"] = d = self.parse_datetime_iso(deviation["ts"])
deviation["published_time"] = int(dt.to_ts(d))
deviation["da_category"] = "Status"
deviation["category_path"] = "status"

View File

@@ -72,9 +72,7 @@ class DiscordExtractor(Extractor):
"author_files": [],
"message": self.extract_message_text(message),
"message_id": message["id"],
"date": text.parse_datetime(
message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z"
),
"date": self.parse_datetime_iso(message["timestamp"]),
"files": []
})

View File

@@ -62,7 +62,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"author" : text.remove_html(author),
"group" : (text.remove_html(group) or
text.extr(group, ' alt="', '"')),
"date" : text.parse_datetime(extr(
"date" : self.parse_datetime(extr(
'"icon-calendar"></i> ', '<'), "%b %d, %Y"),
"tags" : text.split_html(extr(
"class='tags'>", "<div id='chapter-actions'")),
@@ -166,8 +166,6 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
data["scanlator"] = content[1].text[11:]
data["tags"] = content[2].text[6:].lower().split(", ")
data["title"] = element[5].text
data["date"] = text.parse_datetime(
element[1].text, "%Y-%m-%dT%H:%M:%S%z")
data["date_updated"] = text.parse_datetime(
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
data["date"] = self.parse_datetime_iso(element[1].text)
data["date_updated"] = self.parse_datetime_iso(element[2].text)
yield Message.Queue, element[4].text, data

View File

@@ -51,8 +51,7 @@ class E621Extractor(danbooru.DanbooruExtractor):
post["filename"] = file["md5"]
post["extension"] = file["ext"]
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(post["created_at"])
post.update(data)
yield Message.Directory, post

View File

@@ -96,7 +96,7 @@ class EromeAlbumExtractor(EromeExtractor):
if not date:
ts = text.extr(group, '?v=', '"')
if len(ts) > 1:
date = text.parse_timestamp(ts)
date = self.parse_timestamp(ts)
data = {
"album_id": album_id,

View File

@@ -216,7 +216,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def _items_hitomi(self):
if self.config("metadata", False):
data = self.metadata_from_api()
data["date"] = text.parse_timestamp(data["posted"])
data["date"] = self.parse_timestamp(data["posted"])
else:
data = {}
@@ -233,7 +233,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data = self.metadata_from_page(page)
if self.config("metadata", False):
data.update(self.metadata_from_api())
data["date"] = text.parse_timestamp(data["posted"])
data["date"] = self.parse_timestamp(data["posted"])
if self.config("tags", False):
tags = collections.defaultdict(list)
for tag in data["tags"]:
@@ -258,8 +258,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"_" : extr('<div id="gdc"><div class="cs ct', '"'),
"eh_category" : extr('>', '<'),
"uploader" : extr('<div id="gdn">', '</div>'),
"date" : text.parse_datetime(extr(
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
"date" : self.parse_datetime_iso(extr(
'>Posted:</td><td class="gdt2">', '</td>')),
"parent" : extr(
'>Parent:</td><td class="gdt2"><a href="', '"'),
"expunged" : "Yes" != extr(

View File

@@ -108,7 +108,7 @@ class FacebookExtractor(Extractor):
'"message":{"delight_ranges"',
'"},"message_preferred_body"'
).rsplit('],"text":"', 1)[-1]),
"date": text.parse_timestamp(
"date": self.parse_timestamp(
text.extr(photo_page, '\\"publish_time\\":', ',') or
text.extr(photo_page, '"created_time":', ',')
),
@@ -172,7 +172,7 @@ class FacebookExtractor(Extractor):
"user_id": text.extr(
video_page, '"owner":{"__typename":"User","id":"', '"'
),
"date": text.parse_timestamp(text.extr(
"date": self.parse_timestamp(text.extr(
video_page, '\\"publish_time\\":', ','
)),
"type": "video"

View File

@@ -128,7 +128,7 @@ class FanboxExtractor(Extractor):
if file.get("extension", "").lower() in exts
]
post["date"] = text.parse_datetime(post["publishedDatetime"])
post["date"] = self.parse_datetime_iso(post["publishedDatetime"])
post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False

View File

@@ -35,7 +35,7 @@ class FanslyExtractor(Extractor):
for post in self.posts():
files = self._extract_files(post)
post["count"] = len(files)
post["date"] = text.parse_timestamp(post["createdAt"])
post["date"] = self.parse_timestamp(post["createdAt"])
yield Message.Directory, post
for post["num"], file in enumerate(files, 1):
@@ -117,8 +117,8 @@ class FanslyExtractor(Extractor):
file = {
**variant,
"format": variant["type"],
"date": text.parse_timestamp(media["createdAt"]),
"date_updated": text.parse_timestamp(media["updatedAt"]),
"date": self.parse_timestamp(media["createdAt"]),
"date_updated": self.parse_timestamp(media["updatedAt"]),
}
if "metadata" in location:

View File

@@ -101,7 +101,7 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"],
"rating": resp["rating"],
"posted_at": resp["posted_at"],
"date": text.parse_datetime(
"date": self.parse_datetime(
resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"],

View File

@@ -98,7 +98,7 @@ class FlickrImageExtractor(FlickrExtractor):
photo["comments"] = text.parse_int(photo["comments"]["_content"])
photo["description"] = photo["description"]["_content"]
photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
photo["date"] = text.parse_timestamp(photo["dateuploaded"])
photo["date"] = self.parse_timestamp(photo["dateuploaded"])
photo["views"] = text.parse_int(photo["views"])
photo["id"] = text.parse_int(photo["id"])
@@ -489,7 +489,7 @@ class FlickrAPI(oauth.OAuth1API):
def _extract_format(self, photo):
photo["description"] = photo["description"]["_content"].strip()
photo["views"] = text.parse_int(photo["views"])
photo["date"] = text.parse_timestamp(photo["dateupload"])
photo["date"] = self.parse_timestamp(photo["dateupload"])
photo["tags"] = photo["tags"].split()
self._extract_metadata(photo)

View File

@@ -143,7 +143,7 @@ class FuraffinityExtractor(Extractor):
data["folders"] = () # folders not present in old layout
data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
data["date"] = self.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"])
data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-"
f"{path.rsplit('/', 2)[1]}.jpg")

View File

@@ -55,8 +55,7 @@ class Furry34Extractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
post["date"] = text.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created"])
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]

View File

@@ -246,7 +246,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs:
for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"])
post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post
params["pid"] += 1
@@ -273,7 +273,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs:
for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"])
post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post
params["pid"] -= 1

View File

@@ -35,8 +35,7 @@ class GelbooruV01Extractor(booru.BooruExtractor):
}
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"])
return post

View File

@@ -122,7 +122,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _prepare(self, post):
post["tags"] = post["tags"].strip()
post["date"] = text.parse_datetime(
post["date"] = self.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
def _html(self, post):

View File

@@ -52,7 +52,7 @@ class GirlsreleasedSetExtractor(GirlsreleasedExtractor):
"id": json["id"],
"site": json["site"],
"model": [model for _, model in json["models"]],
"date": text.parse_timestamp(json["date"]),
"date": self.parse_timestamp(json["date"]),
"count": len(json["images"]),
"url": "https://girlsreleased.com/set/" + json["id"],
}

View File

@@ -101,9 +101,8 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
"model": model,
"model_list": self._parse_model_list(model),
"tags": text.split_html(tags)[1::2],
"date": text.parse_datetime(
text.extr(page, 'class="hover-time" title="', '"')[:19],
"%Y-%m-%d %H:%M:%S"),
"date": self.parse_datetime_iso(text.extr(
page, 'class="hover-time" title="', '"')[:19]),
"is_favorite": self._parse_is_favorite(page),
"source_filename": source,
"uploader": uploader,

View File

@@ -34,7 +34,7 @@ class HatenablogExtractor(Extractor):
def _handle_article(self, article: str):
extr = text.extract_from(article)
date = text.parse_datetime(extr('<time datetime="', '"'))
date = self.parse_datetime_iso(extr('<time datetime="', '"'))
entry_link = text.unescape(extr('<a href="', '"'))
entry = entry_link.partition("/entry/")[2]
title = text.unescape(extr('>', '<'))

View File

@@ -86,7 +86,7 @@ class HentaifoundryExtractor(Extractor):
.replace("\r\n", "\n")),
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")],
"date" : text.parse_datetime(extr("datetime='", "'")),
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
"views" : text.parse_int(extr(">Views</span>", "<")),
"score" : text.parse_int(extr(">Vote Score</span>", "<")),
"media" : text.unescape(extr(">Media</span>", "<").strip()),
@@ -126,7 +126,7 @@ class HentaifoundryExtractor(Extractor):
"title" : text.unescape(extr(
"<div class='titlebar'>", "</a>").rpartition(">")[2]),
"author" : text.unescape(extr('alt="', '"')),
"date" : text.parse_datetime(extr(
"date" : self.parse_datetime(extr(
">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
"status" : extr("class='indent'>", "<"),
}

View File

@@ -35,8 +35,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
"language" : info["language"]["name"],
"lang" : util.language_to_code(info["language"]["name"]),
"tags" : [t["slug"] for t in info["tags"]],
"date" : text.parse_datetime(
info["uploaded_at"], "%Y-%m-%d"),
"date" : self.parse_datetime_iso(info["uploaded_at"]),
}
for key in ("artists", "authors", "groups", "characters",
"relationships", "parodies"):

View File

@@ -84,7 +84,7 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
"type" : info["type"].capitalize(),
"language" : language,
"lang" : util.language_to_code(language),
"date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
"date" : self.parse_datetime_iso(date),
"tags" : tags,
"artist" : [o["artist"] for o in iget("artists") or ()],
"group" : [o["group"] for o in iget("groups") or ()],

View File

@@ -53,11 +53,9 @@ class ImagechestGalleryExtractor(GalleryExtractor):
def _metadata_api(self, page):
post = self.api.post(self.gallery_id)
post["date"] = text.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["created"])
for img in post["images"]:
img["date"] = text.parse_datetime(
img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
img["date"] = self.parse_datetime_iso(img["created"])
post["gallery_id"] = self.gallery_id
post.pop("image_count", None)

View File

@@ -159,8 +159,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
"width" : text.parse_int(extr('"og:image:width" content="', '"')),
"height": text.parse_int(extr('"og:image:height" content="', '"')),
"album" : extr("Added to <a", "</a>"),
"date" : text.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : util.json_loads(extr(
"CHV.obj.resource=", "};") + "}").get("user"),
}

View File

@@ -31,7 +31,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
"title": text.unescape(extr("<h1>", "</h1>")),
"count": text.parse_int(extr(
"total of images in this gallery: ", " ")),
"date" : text.parse_datetime(
"date" : self.parse_datetime(
extr("created on ", " by <")
.replace("th, ", " ", 1).replace("nd, ", " ", 1)
.replace("st, ", " ", 1), "%B %d %Y at %H:%M"),

View File

@@ -38,7 +38,7 @@ class ImgurExtractor(Extractor):
image["url"] = url = \
f"https://i.imgur.com/{image['id']}.{image['ext']}"
image["date"] = text.parse_datetime(image["created_at"])
image["date"] = self.parse_datetime_iso(image["created_at"])
image["_http_validate"] = self._validate
text.nameext_from_url(url, image)
@@ -106,7 +106,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
del album["media"]
count = len(images)
album["date"] = text.parse_datetime(album["created_at"])
album["date"] = self.parse_datetime_iso(album["created_at"])
try:
del album["ad_url"]

View File

@@ -35,8 +35,8 @@ class InkbunnyExtractor(Extractor):
for post in self.posts():
post.update(metadata)
post["date"] = text.parse_datetime(
post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(
post["create_datetime"][:19])
post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
post["ratings"] = [r["name"] for r in post["ratings"]]
files = post["files"]
@@ -52,8 +52,8 @@ class InkbunnyExtractor(Extractor):
for post["num"], file in enumerate(files, 1):
post.update(file)
post["deleted"] = (file["deleted"] == "t")
post["date"] = text.parse_datetime(
file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(
file["create_datetime"][:19])
text.nameext_from_url(file["file_name"], post)
url = file["file_url_full"]

View File

@@ -173,7 +173,7 @@ class InstagramExtractor(Extractor):
post_url = f"{self.root}/stories/highlights/{reel_id}/"
data = {
"user" : post.get("user"),
"expires": text.parse_timestamp(expires),
"expires": self.parse_timestamp(expires),
"post_id": reel_id,
"post_shortcode": shortcode_from_id(reel_id),
"post_url": post_url,
@@ -224,7 +224,7 @@ class InstagramExtractor(Extractor):
data["owner_id"] = owner["pk"]
data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name")
data["post_date"] = data["date"] = text.parse_timestamp(
data["post_date"] = data["date"] = self.parse_timestamp(
post.get("taken_at") or post.get("created_at") or post.get("seen"))
data["_files"] = files = []
for num, item in enumerate(items, 1):
@@ -278,7 +278,7 @@ class InstagramExtractor(Extractor):
media = {
"num" : num,
"date" : text.parse_timestamp(item.get("taken_at") or
"date" : self.parse_timestamp(item.get("taken_at") or
media.get("taken_at") or
post.get("taken_at")),
"media_id" : item["pk"],
@@ -299,7 +299,7 @@ class InstagramExtractor(Extractor):
if "reshared_story_media_author" in item:
media["author"] = item["reshared_story_media_author"]
if "expiring_at" in item:
media["expires"] = text.parse_timestamp(post["expiring_at"])
media["expires"] = self.parse_timestamp(post["expiring_at"])
self._extract_tagged_users(item, media)
files.append(media)
@@ -342,7 +342,7 @@ class InstagramExtractor(Extractor):
"post_id" : post["id"],
"post_shortcode": post["shortcode"],
"post_url" : f"{self.root}/p/{post['shortcode']}/",
"post_date" : text.parse_timestamp(post["taken_at_timestamp"]),
"post_date" : self.parse_timestamp(post["taken_at_timestamp"]),
"description": text.parse_unicode_escapes("\n".join(
edge["node"]["text"]
for edge in post["edge_media_to_caption"]["edges"]
@@ -634,7 +634,7 @@ class InstagramStoriesTrayExtractor(InstagramExtractor):
def items(self):
base = f"{self.root}/stories/id:"
for story in self.api.reels_tray():
story["date"] = text.parse_timestamp(story["latest_reel_media"])
story["date"] = self.parse_timestamp(story["latest_reel_media"])
story["_extractor"] = InstagramStoriesExtractor
yield Message.Queue, f"{base}{story['id']}/", story

View File

@@ -36,8 +36,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
'{"":' + data.replace('\\"', '"')))
doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime(
doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ")
doc["date"] = self.parse_datetime_iso(
doc["originalPublishDateInISOString"])
self.count = text.parse_int(doc["pageCount"])
self.base = (f"https://image.isu.pub/{doc['revisionId']}-"

View File

@@ -32,8 +32,7 @@ class ItakuExtractor(Extractor):
def items(self):
if images := self.images():
for image in images:
image["date"] = text.parse_datetime(
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
image["date"] = self.parse_datetime_iso(image["date_added"])
for category, tags in image.pop("categorized_tags").items():
image[f"tags_{category.lower()}"] = [
t["name"] for t in tags]
@@ -60,15 +59,14 @@ class ItakuExtractor(Extractor):
for post in posts:
images = post.pop("gallery_images") or ()
post["count"] = len(images)
post["date"] = text.parse_datetime(
post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["date"] = self.parse_datetime_iso(post["date_added"])
post["tags"] = [t["name"] for t in post["tags"]]
yield Message.Directory, post
for post["num"], image in enumerate(images, 1):
post["file"] = image
image["date"] = text.parse_datetime(
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
image["date"] = self.parse_datetime_iso(
image["date_added"])
url = image["image"]
yield Message.Url, url, text.nameext_from_url(url, post)

View File

@@ -122,10 +122,10 @@ class IwaraExtractor(Extractor):
info["file_id"] = file_info.get("id")
info["filename"] = filename
info["extension"] = extension
info["date"] = text.parse_datetime(
file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
info["date_updated"] = text.parse_datetime(
file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
info["date"] = self.parse_datetime_iso(
file_info.get("createdAt"))
info["date_updated"] = self.parse_datetime_iso(
file_info.get("updatedAt"))
info["mime"] = file_info.get("mime")
info["size"] = file_info.get("size")
info["width"] = file_info.get("width")
@@ -144,8 +144,7 @@ class IwaraExtractor(Extractor):
"status" : user.get("status"),
"role" : user.get("role"),
"premium": user.get("premium"),
"date" : text.parse_datetime(
user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"),
"date" : self.parse_datetime_iso(user.get("createdAt")),
"description": profile.get("body"),
}

View File

@@ -32,8 +32,7 @@ class KabeuchiUserExtractor(Extractor):
if post.get("is_ad") or not post["image1"]:
continue
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"])
yield Message.Directory, post
for key in keys:

View File

@@ -244,7 +244,7 @@ class KemonoExtractor(Extractor):
def _parse_datetime(self, date_string):
if len(date_string) > 19:
date_string = date_string[:19]
return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
return self.parse_datetime_iso(date_string)
def _revisions(self, posts):
return itertools.chain.from_iterable(

View File

@@ -119,8 +119,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
'property="image:width" content="', '"')),
"height": text.parse_int(extr(
'property="image:height" content="', '"')),
"date" : text.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
}
text.nameext_from_url(data["url"], data)

View File

@@ -45,7 +45,7 @@ class LivedoorExtractor(Extractor):
"title" : text.unescape(extr('dc:title="', '"')),
"categories" : extr('dc:subject="', '"').partition(",")[::2],
"description": extr('dc:description="', '"'),
"date" : text.parse_datetime(extr('dc:date="', '"')),
"date" : self.parse_datetime_iso(extr('dc:date="', '"')),
"tags" : text.split_html(tags)[1:] if tags else [],
"user" : self.user,
"body" : body,

View File

@@ -29,7 +29,7 @@ class LofterExtractor(Extractor):
post = post["post"]
post["blog_name"] = post["blogInfo"]["blogName"]
post["date"] = text.parse_timestamp(post["publishTime"] // 1000)
post["date"] = self.parse_timestamp(post["publishTime"] // 1000)
post_type = post["type"]
# Article

View File

@@ -69,7 +69,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
image["thumbnail"] = ""
image["tags"] = [item["text"] for item in image["tags"]]
image["date"] = text.parse_timestamp(image["created"])
image["date"] = self.parse_timestamp(image["created"])
image["id"] = text.parse_int(image["id"])
url = (image["url_to_original"] or image["url_to_video"]
@@ -188,7 +188,7 @@ fragment AlbumStandard on Album {
album["created_by"] = album["created_by"]["display_name"]
album["id"] = text.parse_int(album["id"])
album["date"] = text.parse_timestamp(album["created"])
album["date"] = self.parse_timestamp(album["created"])
return album

View File

@@ -47,8 +47,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
"path": text.unescape(extr('href="', '"')),
"chapter_string": text.unescape(extr(">", "<")),
"size": text.parse_bytes(extr("<td>", "</td>")),
"date": text.parse_datetime(
extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
"date": self.parse_datetime_iso(extr("<td>", "</td>").strip()),
})
if self.config("chapter-reverse"):

View File

@@ -68,7 +68,7 @@ class MangadexExtractor(Extractor):
"chapter" : text.parse_int(chnum),
"chapter_minor": f"{sep}{minor}",
"chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]),
"date" : self.parse_datetime_iso(cattributes["publishAt"]),
"group" : [group["attributes"]["name"]
for group in relationships["scanlation_group"]],
"lang" : lang,
@@ -109,8 +109,8 @@ class MangadexCoversExtractor(MangadexExtractor):
"cover" : cattributes["fileName"],
"lang" : cattributes.get("locale"),
"volume" : text.parse_int(cattributes["volume"]),
"date" : text.parse_datetime(cattributes["createdAt"]),
"date_updated": text.parse_datetime(cattributes["updatedAt"]),
"date" : self.parse_datetime_iso(cattributes["createdAt"]),
"date_updated": self.parse_datetime_iso(cattributes["updatedAt"]),
}
@@ -454,7 +454,7 @@ def _manga_info(self, uuid):
"manga_id": manga["id"],
"manga_titles": [t.popitem()[1]
for t in mattr.get("altTitles") or ()],
"manga_date" : text.parse_datetime(mattr.get("createdAt")),
"manga_date" : self.parse_datetime_iso(mattr.get("createdAt")),
"description" : (mattr["description"].get("en") or
next(iter(mattr["description"].values()), "")),
"demographic": mattr.get("publicationDemographic"),

View File

@@ -99,7 +99,7 @@ class MangafoxMangaExtractor(MangaExtractor):
"chapter" : text.parse_int(chapter),
"chapter_minor" : minor or "",
"chapter_string": cstr,
"date" : text.parse_datetime(
"date" : self.parse_datetime(
extr('right">', '</span>'), "%b %d, %Y"),
}
chapter.update(data)

View File

@@ -50,10 +50,10 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
extr = text.extract_from(page)
data = {
"date" : text.parse_datetime(extr(
'"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
"date_updated": text.parse_datetime(extr(
'"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
"date" : self.parse_datetime_iso(extr(
'"datePublished": "', '"')[:19]),
"date_updated": self.parse_datetime_iso(extr(
'"dateModified": "', '"')[:19]),
"manga_id" : text.parse_int(extr("comic_id =", ";")),
"chapter_id" : text.parse_int(extr("chapter_id =", ";")),
"manga" : extr("comic_name =", ";").strip('" '),
@@ -99,7 +99,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
manga = text.unescape(extr("<h1>", "<"))
author = text.remove_html(extr("<li>Author(s) :", "</a>"))
status = extr("<li>Status :", "<").strip()
update = text.parse_datetime(extr(
update = self.parse_datetime(extr(
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p")
tags = text.split_html(extr(">Genres :", "</li>"))[::2]
@@ -121,7 +121,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
"chapter" : text.parse_int(chapter),
"chapter_minor": (sep and ".") + minor,
"title" : title.partition(": ")[2],
"date" : text.parse_datetime(date, "%b-%d-%Y %H:%M"),
"date" : self.parse_datetime(date, "%b-%d-%Y %H:%M"),
"lang" : "en",
"language": "English",
}))

View File

@@ -101,7 +101,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"language" : util.code_to_language(lang),
"source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(chapter["dateCreate"] // 1000),
"date" : self.parse_timestamp(chapter["dateCreate"] // 1000),
}
def images(self, _):
@@ -138,7 +138,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"language" : util.code_to_language(lang),
"source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(
"date" : self.parse_timestamp(
chapter["dateCreate"] // 1000),
"_extractor": MangaparkChapterExtractor,
}

View File

@@ -40,10 +40,8 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
"chapter_minor": str(round(minor, 5))[1:] if minor else "",
"chapter_id" : text.parse_int(chapter_id),
"chapter_url" : comic["url"],
"date" : text.parse_datetime(
comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
"date_updated" : text.parse_datetime(
comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
"date" : self.parse_datetime_iso(comic["datePublished"]),
"date_updated" : self.parse_datetime_iso(comic["dateModified"]),
}
def images(self, page):

View File

@@ -119,7 +119,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
"album": {
"id": self.album_id,
"name": text.unescape(title),
"date": text.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
"date": self.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
"description": text.unescape(descr),
},
"count": text.parse_int(count),

View File

@@ -64,8 +64,7 @@ class MastodonExtractor(BaseExtractor):
status["count"] = len(attachments)
status["tags"] = [tag["name"] for tag in status["tags"]]
status["date"] = text.parse_datetime(
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
status["date"] = self.parse_datetime_iso(status["created_at"][:19])
yield Message.Directory, status
for status["num"], media in enumerate(attachments, 1):
@@ -319,10 +318,8 @@ class MastodonAPI():
if code == 404:
raise exception.NotFoundError()
if code == 429:
self.extractor.wait(until=text.parse_datetime(
response.headers["x-ratelimit-reset"],
"%Y-%m-%dT%H:%M:%S.%fZ",
))
self.extractor.wait(until=self.parse_datetime_iso(
response.headers["x-ratelimit-reset"]))
continue
raise exception.AbortExtraction(response.json().get("error"))

View File

@@ -48,13 +48,11 @@ class MisskeyExtractor(BaseExtractor):
note["instance"] = self.instance
note["instance_remote"] = note["user"]["host"]
note["count"] = len(files)
note["date"] = text.parse_datetime(
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
note["date"] = self.parse_datetime_iso(note["createdAt"])
yield Message.Directory, note
for note["num"], file in enumerate(files, 1):
file["date"] = text.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date"] = self.parse_datetime_iso(file["createdAt"])
note["file"] = file
url = file["url"]
yield Message.Url, url, text.nameext_from_url(url, note)

View File

@@ -9,9 +9,8 @@
"""Extractors for Moebooru based sites"""
from .booru import BooruExtractor
from .. import text, util
from .. import text, util, dt
import collections
import datetime
class MoebooruExtractor(BooruExtractor):
@@ -21,7 +20,7 @@ class MoebooruExtractor(BooruExtractor):
page_start = 1
def _prepare(self, post):
post["date"] = text.parse_timestamp(post["created_at"])
post["date"] = dt.parse_ts(post["created_at"])
def _html(self, post):
url = f"{self.root}/post/show/{post['id']}"
@@ -164,14 +163,14 @@ class MoebooruPopularExtractor(MoebooruExtractor):
date = (f"{params['year']:>04}-{params.get('month', '01'):>02}-"
f"{params.get('day', '01'):>02}")
else:
date = datetime.date.today().isoformat()
date = dt.date.today().isoformat()
scale = self.scale
if scale.startswith("by_"):
scale = scale[3:]
if scale == "week":
date = datetime.date.fromisoformat(date)
date = (date - datetime.timedelta(days=date.weekday())).isoformat()
date = dt.date.fromisoformat(date)
date = (date - dt.timedelta(days=date.weekday())).isoformat()
elif scale == "month":
date = date[:-3]

View File

@@ -9,9 +9,8 @@
"""Extractors for https://motherless.com/"""
from .common import Extractor, Message
from .. import text, util, exception
from .. import text, dt, exception
from ..cache import memcache
from datetime import timedelta
BASE_PATTERN = r"(?:https?://)?motherless\.com"
@@ -115,14 +114,14 @@ class MotherlessExtractor(Extractor):
return data
def _parse_datetime(self, dt):
if " ago" not in dt:
return text.parse_datetime(dt, "%d %b %Y")
def _parse_datetime(self, dt_string):
if " ago" not in dt_string:
return dt.parse(dt_string, "%d %b %Y")
value = text.parse_int(dt[:-5])
delta = timedelta(0, value*3600) if dt[-5] == "h" else timedelta(value)
return (util.datetime_utcnow() - delta).replace(
hour=0, minute=0, second=0)
value = text.parse_int(dt_string[:-5])
delta = (dt.timedelta(0, value*3600) if dt_string[-5] == "h" else
dt.timedelta(value))
return (dt.now() - delta).replace(hour=0, minute=0, second=0)
@memcache(keyarg=2)
def _extract_gallery_title(self, page, gallery_id):

View File

@@ -9,8 +9,7 @@
"""Extractors for https://blog.naver.com/"""
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
import datetime
from .. import text, util, dt
import time
@@ -67,11 +66,11 @@ class NaverBlogPostExtractor(NaverBlogBase, GalleryExtractor):
return data
def _parse_datetime(self, date_string):
if "" in date_string:
def _parse_datetime(self, dt_string):
if "" in dt_string:
ts = time.gmtime()
return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M")
return dt.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
return dt.parse(dt_string, "%Y. %m. %d. %H:%M")
def images(self, page):
files = []

View File

@@ -31,17 +31,17 @@ class NaverChzzkExtractor(Extractor):
data["uid"] = data["objectId"]
data["user"] = comment["user"]
data["count"] = len(files)
data["date"] = text.parse_datetime(
data["date"] = self.parse_datetime(
data["createdDate"], "%Y%m%d%H%M%S")
yield Message.Directory, data
for data["num"], file in enumerate(files, 1):
if extra := file.get("extraJson"):
file.update(util.json_loads(extra))
file["date"] = text.parse_datetime(
file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date_updated"] = text.parse_datetime(
file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date"] = self.parse_datetime_iso(
file["createdDate"])
file["date_updated"] = self.parse_datetime_iso(
file["updatedDate"])
data["file"] = file
url = file["attachValue"]
yield Message.Url, url, text.nameext_from_url(url, data)

View File

@@ -59,8 +59,8 @@ class NekohousePostExtractor(NekohouseExtractor):
'class="scrape__user-name', '</').rpartition(">")[2].strip()),
"title" : text.unescape(extr(
'class="scrape__title', '</').rpartition(">")[2]),
"date" : text.parse_datetime(extr(
'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"),
"date" : self.parse_datetime_iso(extr(
'datetime="', '"')[:19]),
"content": text.unescape(extr(
'class="scrape__content">', "</div>").strip()),
}

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from .. import text, util, dt, exception
from ..cache import cache
import itertools
@@ -218,7 +218,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')),
"type" : "art",
"_type" : "i",
"date" : text.parse_datetime(extr(
"date" : dt.parse_iso(extr(
'itemprop="datePublished" content="', '"')),
"rating" : extr('class="rated-', '"'),
"url" : full('src="', '"'),
@@ -268,7 +268,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')),
"type" : "audio",
"_type" : "a",
"date" : text.parse_datetime(extr(
"date" : dt.parse_iso(extr(
'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"),
"index" : text.parse_int(index),
@@ -287,7 +287,7 @@ class NewgroundsExtractor(Extractor):
src = src.replace("\\/", "/")
formats = ()
type = extr(',"description":"', '"')
date = text.parse_datetime(extr(
date = dt.parse_iso(extr(
'itemprop="datePublished" content="', '"'))
if type:
type = type.rpartition(" ")[2].lower()
@@ -302,7 +302,7 @@ class NewgroundsExtractor(Extractor):
sources = self.request_json(url, headers=headers)["sources"]
formats = self._video_formats(sources)
src = next(formats, "")
date = text.parse_timestamp(src.rpartition("?")[2])
date = self.parse_timestamp(src.rpartition("?")[2])
type = "movie"
return {

View File

@@ -9,7 +9,7 @@
"""Extractors for nijie instances"""
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
from .. import text, exception
from .. import text, dt, exception
from ..cache import cache
@@ -82,8 +82,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"title" : keywords[0].strip(),
"description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")),
"date" : text.parse_datetime(extr(
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
"date" : dt.parse(extr(
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"
) - dt.timedelta(hours=9),
"artist_id" : text.parse_int(extr('/members.php?id=', '"')),
"artist_name": keywords[1],
"tags" : keywords[2:-1],
@@ -101,9 +102,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"artist_id" : text.parse_int(extr('members.php?id=', '"')),
"artist_name": keywords[1],
"tags" : keywords[2:-1],
"date" : text.parse_datetime(extr(
"itemprop='datePublished' content=", "<").rpartition(">")[2],
"%Y-%m-%d %H:%M:%S", 9),
"date" : dt.parse_iso(extr(
"itemprop='datePublished' content=", "<").rpartition(">")[2]
) - dt.timedelta(hours=9),
}
def _extract_images(self, image_id, page):

View File

@@ -114,7 +114,7 @@ class NitterExtractor(BaseExtractor):
return {
"author" : author,
"user" : self.user_obj or author,
"date" : text.parse_datetime(
"date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0],
"content": extr('class="tweet-content', "</div").partition(">")[2],
@@ -142,7 +142,7 @@ class NitterExtractor(BaseExtractor):
return {
"author" : author,
"user" : self.user_obj or author,
"date" : text.parse_datetime(
"date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0],
"content" : extr('class="quote-text', "</div").partition(">")[2],
@@ -173,7 +173,7 @@ class NitterExtractor(BaseExtractor):
"nick" : extr('title="', '"'),
"name" : extr('title="@', '"'),
"description" : extr('<p dir="auto">', '<'),
"date" : text.parse_datetime(
"date" : self.parse_datetime(
extr('class="profile-joindate"><span title="', '"'),
"%I:%M %p - %d %b %Y"),
"statuses_count" : text.parse_int(extr(

View File

@@ -9,7 +9,7 @@
"""Extractors for https://nozomi.la/"""
from .common import Extractor, Message
from .. import text
from .. import text, dt
def decode_nozomi(n):
@@ -49,10 +49,9 @@ class NozomiExtractor(Extractor):
post["character"] = self._list(post.get("character"))
try:
post["date"] = text.parse_datetime(
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
post["date"] = dt.parse_iso(post["date"] + ":00")
except Exception:
post["date"] = None
post["date"] = dt.NONE
post.update(data)

View File

@@ -53,8 +53,7 @@ class PahealExtractor(Extractor):
extr("<source src='", "'")),
"uploader": text.unquote(extr(
"class='username' href='/user/", "'")),
"date" : text.parse_datetime(
extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
"source" : text.unescape(text.extr(
extr(">Source Link<", "</td>"), "href='", "'")),
}
@@ -133,7 +132,7 @@ class PahealTagExtractor(PahealExtractor):
"duration" : text.parse_float(duration[:-1]),
"tags" : text.unescape(tags),
"size" : text.parse_bytes(size[:-1]),
"date" : text.parse_datetime(date, "%B %d, %Y; %H:%M"),
"date" : self.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : f"{pid} - {tags}",
"extension": ext,
}

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.patreon.com/"""
from .common import Extractor, Message
from .. import text, util, exception
from .. import text, util, dt, exception
from ..cache import memcache
import collections
import itertools
@@ -177,8 +177,7 @@ class PatreonExtractor(Extractor):
post, included, "attachments")
attr["attachments_media"] = self._files(
post, included, "attachments_media")
attr["date"] = text.parse_datetime(
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
attr["date"] = self.parse_datetime_iso(attr["published_at"])
try:
attr["campaign"] = (included["campaign"][
@@ -226,8 +225,7 @@ class PatreonExtractor(Extractor):
user = response.json()["data"]
attr = user["attributes"]
attr["id"] = user["id"]
attr["date"] = text.parse_datetime(
attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
attr["date"] = self.parse_datetime_iso(attr["created"])
return attr
def _collection(self, collection_id):
@@ -236,8 +234,7 @@ class PatreonExtractor(Extractor):
coll = data["data"]
attr = coll["attributes"]
attr["id"] = coll["id"]
attr["date"] = text.parse_datetime(
attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
attr["date"] = self.parse_datetime_iso(attr["created_at"])
return attr
def _filename(self, url):
@@ -445,8 +442,7 @@ class PatreonUserExtractor(PatreonExtractor):
def posts(self):
if date_max := self._get_date_min_max(None, None)[1]:
self._cursor = cursor = \
util.datetime_from_timestamp(date_max).isoformat()
self._cursor = cursor = dt.from_ts(date_max).isoformat()
self._init_cursor = lambda: cursor
url = self._build_url("stream", (

View File

@@ -35,8 +35,7 @@ class PexelsExtractor(Extractor):
post["type"] = attr["type"]
post.update(metadata)
post["date"] = text.parse_datetime(
post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"][:-5])
if "image" in post:
url, _, query = post["image"]["download_link"].partition("?")

View File

@@ -36,8 +36,7 @@ class PhilomenaExtractor(BooruExtractor):
return url
def _prepare(self, post):
post["date"] = text.parse_datetime(
post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"][:19])
BASE_PATTERN = PhilomenaExtractor.update({

View File

@@ -29,8 +29,7 @@ class PhotovogueUserExtractor(Extractor):
for photo in self.photos():
url = photo["gallery_image"]
photo["title"] = photo["title"].strip()
photo["date"] = text.parse_datetime(
photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
photo["date"] = self.parse_datetime_iso(photo["date"])
yield Message.Directory, photo
yield Message.Url, url, text.nameext_from_url(url, photo)

View File

@@ -29,8 +29,7 @@ class PicartoGalleryExtractor(Extractor):
def items(self):
for post in self.posts():
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S")
post["date"] = self.parse_datetime_iso(post["created_at"])
variations = post.pop("variations", ())
yield Message.Directory, post

View File

@@ -26,8 +26,7 @@ class PiczelExtractor(Extractor):
def items(self):
for post in self.posts():
post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(post["created_at"])
if post["multi"]:
images = post["images"]

View File

@@ -48,8 +48,7 @@ class PillowfortExtractor(Extractor):
for url in inline(post["content"]):
files.append({"url": url})
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(post["created_at"])
post["post_id"] = post.pop("id")
post["count"] = len(files)
yield Message.Directory, post
@@ -76,8 +75,7 @@ class PillowfortExtractor(Extractor):
if "id" not in file:
post["id"] = post["hash"]
if "created_at" in file:
post["date"] = text.parse_datetime(
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = self.parse_datetime_iso(file["created_at"])
yield msgtype, url, post

View File

@@ -24,10 +24,6 @@ class PixeldrainExtractor(Extractor):
if api_key := self.config("api-key"):
self.session.auth = util.HTTPBasicAuth("", api_key)
def parse_datetime(self, date_string):
return text.parse_datetime(
date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
class PixeldrainFileExtractor(PixeldrainExtractor):
"""Extractor for pixeldrain files"""
@@ -45,7 +41,7 @@ class PixeldrainFileExtractor(PixeldrainExtractor):
file = self.request_json(url + "/info")
file["url"] = url + "?download"
file["date"] = self.parse_datetime(file["date_upload"])
file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file)
yield Message.Directory, file
@@ -72,7 +68,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
files = album["files"]
album["count"] = album["file_count"]
album["date"] = self.parse_datetime(album["date_created"])
album["date"] = self.parse_datetime_iso(album["date_created"])
if self.file_index:
idx = text.parse_int(self.file_index)
@@ -91,7 +87,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
file["album"] = album
file["num"] = num
file["url"] = url = f"{self.root}/api/file/{file['id']}?download"
file["date"] = self.parse_datetime(file["date_upload"])
file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file)
yield Message.Url, url, file
@@ -112,7 +108,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor):
"mime_type" : data["file_type"],
"size" : data["file_size"],
"hash_sha256": data["sha256_sum"],
"date" : self.parse_datetime(data["created"]),
"date" : self.parse_datetime_iso(data["created"]),
}
def items(self):

View File

@@ -9,9 +9,8 @@
"""Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message, Dispatch
from .. import text, util, exception
from .. import text, util, dt, exception
from ..cache import cache, memcache
from datetime import datetime, timedelta
import itertools
import hashlib
@@ -96,7 +95,7 @@ class PixivExtractor(Extractor):
if transform_tags:
transform_tags(work)
work["num"] = 0
work["date"] = text.parse_datetime(work["create_date"])
work["date"] = dt.parse_iso(work["create_date"])
work["rating"] = ratings.get(work["x_restrict"])
work["suffix"] = ""
work.update(metadata)
@@ -353,10 +352,10 @@ class PixivExtractor(Extractor):
if fmt in urls:
yield urls[fmt]
def _date_from_url(self, url, offset=timedelta(hours=9)):
def _date_from_url(self, url, offset=dt.timedelta(hours=9)):
try:
_, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
return datetime(
return dt.datetime(
int(y), int(m), int(d), int(H), int(M), int(S)) - offset
except Exception:
return None
@@ -715,8 +714,7 @@ class PixivRankingExtractor(PixivExtractor):
self.log.warning("invalid date '%s'", date)
date = None
if not date:
now = util.datetime_utcnow()
date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
date = (dt.now() - dt.timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date
self.type = type = query.get("content")
@@ -891,8 +889,7 @@ class PixivSketchExtractor(Extractor):
for post in self.posts():
media = post["media"]
post["post_id"] = post["id"]
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["date"] = dt.parse_iso(post["created_at"])
util.delete_items(post, ("id", "media", "_links"))
yield Message.Directory, post
@@ -972,7 +969,7 @@ class PixivNovelExtractor(PixivExtractor):
if transform_tags:
transform_tags(novel)
novel["num"] = 0
novel["date"] = text.parse_datetime(novel["create_date"])
novel["date"] = dt.parse_iso(novel["create_date"])
novel["rating"] = ratings.get(novel["x_restrict"])
novel["suffix"] = ""
@@ -1154,7 +1151,7 @@ class PixivAppAPI():
"get_secure_url": "1",
}
time = util.datetime_utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
time = dt.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
headers = {
"X-Client-Time": time,
"X-Client-Hash": hashlib.md5(
@@ -1329,11 +1326,11 @@ class PixivAppAPI():
sort = params["sort"]
if sort == "date_desc":
date_key = "end_date"
date_off = timedelta(days=1)
date_off = dt.timedelta(days=1)
date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731
elif sort == "date_asc":
date_key = "start_date"
date_off = timedelta(days=-1)
date_off = dt.timedelta(days=-1)
date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731
else:
date_key = None
@@ -1360,8 +1357,8 @@ class PixivAppAPI():
if date_key and text.parse_int(params.get("offset")) >= 5000:
date_last = data["illusts"][-1]["create_date"]
date_val = (text.parse_datetime(
date_last) + date_off).strftime("%Y-%m-%d")
date_val = (dt.parse_iso(date_last) + date_off).strftime(
"%Y-%m-%d")
self.log.info("Reached 'offset' >= 5000; "
"Updating '%s' to '%s'", date_key, date_val)
params[date_key] = date_val

View File

@@ -9,8 +9,7 @@
"""Extractors for https://www.plurk.com/"""
from .common import Extractor, Message
from .. import text, util, exception
import datetime
from .. import text, util, dt, exception
class PlurkExtractor(Extractor):
@@ -88,12 +87,10 @@ class PlurkTimelineExtractor(PlurkExtractor):
while plurks:
yield from plurks
offset = datetime.datetime.strptime(
plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
offset = dt.parse(plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z")
response = self.request(
url, method="POST", headers=headers, data=data)
plurks = response.json()["plurks"]
plurks = self.request_json(
url, method="POST", headers=headers, data=data)["plurks"]
class PlurkPostExtractor(PlurkExtractor):

View File

@@ -150,8 +150,7 @@ class PornhubGifExtractor(PornhubExtractor):
"tags" : extr("data-context-tag='", "'").split(","),
"title": extr('"name": "', '"'),
"url" : extr('"contentUrl": "', '"'),
"date" : text.parse_datetime(
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
"date" : self.parse_datetime_iso(extr('"uploadDate": "', '"')),
"viewkey" : extr('From this video: '
'<a href="/view_video.php?viewkey=', '"'),
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),

View File

@@ -31,7 +31,7 @@ class PostmillExtractor(BaseExtractor):
title = text.unescape(extr(
'<meta property="og:title" content="', '">'))
date = text.parse_datetime(extr(
date = self.parse_datetime_iso(extr(
'<meta property="og:article:published_time" content="', '">'))
username = extr(
'<meta property="og:article:author" content="', '">')

View File

@@ -42,7 +42,7 @@ class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor):
"chapter_minor": sep + minor,
"chapter_id" : text.parse_int(item["cid"]),
"title" : text.unescape(title),
"date" : text.parse_datetime(
"date" : self.parse_datetime(
date, "%Y-%m-%dWIB%H:%M:%S%z"),
"thumbnail" : item.get("t"),
"lang" : "ja",

Some files were not shown because too many files have changed in this diff Show More