merge branch 'dt': move datetime utils into separate module
- use 'datetime.fromisoformat()' when possible (#7671) - return a datetime-compatible object for invalid datetimes (instead of a 'str' value)
This commit is contained in:
115
gallery_dl/dt.py
Normal file
115
gallery_dl/dt.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Date/Time utilities"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, date, timedelta, timezone # noqa F401
|
||||
|
||||
|
||||
class NullDatetime(datetime):
|
||||
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
return "[Invalid DateTime]"
|
||||
|
||||
def __format__(self, format_spec):
|
||||
return "[Invalid DateTime]"
|
||||
|
||||
|
||||
NONE = NullDatetime(1, 1, 1)
|
||||
EPOCH = datetime(1970, 1, 1)
|
||||
SECOND = timedelta(0, 1)
|
||||
|
||||
|
||||
def normalize(dt):
|
||||
# if (o := dt.utcoffset()) is not None:
|
||||
# return dt.replace(tzinfo=None, microsecond=0) - o
|
||||
if dt.tzinfo is not None:
|
||||
return dt.astimezone(timezone.utc).replace(tzinfo=None, microsecond=0)
|
||||
if dt.microsecond:
|
||||
return dt.replace(microsecond=0)
|
||||
return dt
|
||||
|
||||
|
||||
def convert(value):
|
||||
"""Convert 'value' to a naive UTC datetime object"""
|
||||
if not value:
|
||||
return NONE
|
||||
if isinstance(value, datetime):
|
||||
return normalize(value)
|
||||
if isinstance(value, str) and (dt := parse_iso(value)) is not NONE:
|
||||
return dt
|
||||
return parse_ts(value)
|
||||
|
||||
|
||||
def parse(dt_string, format):
|
||||
"""Parse 'dt_string' according to 'format'"""
|
||||
try:
|
||||
return normalize(datetime.strptime(dt_string, format))
|
||||
except Exception:
|
||||
return NONE
|
||||
|
||||
|
||||
if sys.hexversion < 0x30c0000:
|
||||
# Python <= 3.11
|
||||
def parse_iso(dt_string):
|
||||
"""Parse 'dt_string' as ISO 8601 value"""
|
||||
try:
|
||||
if dt_string[-1] == "Z":
|
||||
# compat for Python < 3.11
|
||||
dt_string = dt_string[:-1]
|
||||
elif dt_string[-5] in "+-":
|
||||
# compat for Python < 3.11
|
||||
dt_string = f"{dt_string[:-2]}:{dt_string[-2:]}"
|
||||
return normalize(datetime.fromisoformat(dt_string))
|
||||
except Exception:
|
||||
return NONE
|
||||
|
||||
from_ts = datetime.utcfromtimestamp
|
||||
now = datetime.utcnow
|
||||
|
||||
else:
|
||||
# Python >= 3.12
|
||||
def parse_iso(dt_string):
|
||||
"""Parse 'dt_string' as ISO 8601 value"""
|
||||
try:
|
||||
return normalize(datetime.fromisoformat(dt_string))
|
||||
except Exception:
|
||||
return NONE
|
||||
|
||||
def from_ts(ts=None):
|
||||
"""Convert Unix timestamp to naive UTC datetime"""
|
||||
Y, m, d, H, M, S, _, _, _ = time.gmtime(ts)
|
||||
return datetime(Y, m, d, H, M, S)
|
||||
|
||||
now = from_ts
|
||||
|
||||
|
||||
def parse_ts(ts, default=NONE):
|
||||
"""Create a datetime object from a Unix timestamp"""
|
||||
try:
|
||||
return from_ts(int(ts))
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
def to_ts(dt):
|
||||
"""Convert naive UTC datetime to Unix timestamp"""
|
||||
return (dt - EPOCH) / SECOND
|
||||
|
||||
|
||||
def to_ts_string(dt):
|
||||
"""Convert naive UTC datetime to Unix timestamp string"""
|
||||
try:
|
||||
return str((dt - EPOCH) // SECOND)
|
||||
except Exception:
|
||||
return ""
|
||||
@@ -46,7 +46,7 @@ class _2chThreadExtractor(Extractor):
|
||||
for post in posts:
|
||||
if files := post.get("files"):
|
||||
post["post_name"] = post["name"]
|
||||
post["date"] = text.parse_timestamp(post["timestamp"])
|
||||
post["date"] = self.parse_timestamp(post["timestamp"])
|
||||
del post["files"]
|
||||
del post["name"]
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ class _2chenThreadExtractor(Extractor):
|
||||
extr = text.extract_from(post)
|
||||
return {
|
||||
"name" : text.unescape(extr("<span>", "</span>")),
|
||||
"date" : text.parse_datetime(
|
||||
"date" : self.parse_datetime(
|
||||
extr("<time", "<").partition(">")[2],
|
||||
"%d %b %Y (%a) %H:%M:%S"
|
||||
),
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"""Extractors for https://4archive.org/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
from .. import text, dt
|
||||
|
||||
|
||||
class _4archiveThreadExtractor(Extractor):
|
||||
@@ -37,7 +37,7 @@ class _4archiveThreadExtractor(Extractor):
|
||||
|
||||
for post in posts:
|
||||
post.update(data)
|
||||
post["time"] = int(util.datetime_to_timestamp(post["date"]))
|
||||
post["time"] = int(dt.to_ts(post["date"]))
|
||||
yield Message.Directory, post
|
||||
if "url" in post:
|
||||
yield Message.Url, post["url"], text.nameext_from_url(
|
||||
@@ -61,10 +61,9 @@ class _4archiveThreadExtractor(Extractor):
|
||||
extr = text.extract_from(post)
|
||||
data = {
|
||||
"name": extr('class="name">', "</span>"),
|
||||
"date": text.parse_datetime(
|
||||
"date": self.parse_datetime_iso(
|
||||
(extr('class="dateTime">', "<") or
|
||||
extr('class="dateTime postNum" >', "<")).strip(),
|
||||
"%Y-%m-%d %H:%M:%S"),
|
||||
extr('class="dateTime postNum" >', "<")).strip()),
|
||||
"no" : text.parse_int(extr(">Post No.", "<")),
|
||||
}
|
||||
if 'class="file"' in post:
|
||||
|
||||
@@ -9,9 +9,8 @@
|
||||
"""Extractors for https://8chan.moe/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
from .. import text, dt
|
||||
from ..cache import memcache
|
||||
from datetime import timedelta
|
||||
import itertools
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)"
|
||||
@@ -44,7 +43,7 @@ class _8chanExtractor(Extractor):
|
||||
def cookies_prepare(self):
|
||||
# fetch captcha cookies
|
||||
# (necessary to download without getting interrupted)
|
||||
now = util.datetime_utcnow()
|
||||
now = dt.now()
|
||||
url = self.root + "/captcha.js"
|
||||
params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")}
|
||||
self.request(url, params=params).content
|
||||
@@ -57,7 +56,7 @@ class _8chanExtractor(Extractor):
|
||||
if cookie.domain.endswith(domain):
|
||||
cookie.expires = None
|
||||
if cookie.name == "captchaexpiration":
|
||||
cookie.value = (now + timedelta(30, 300)).strftime(
|
||||
cookie.value = (now + dt.timedelta(30, 300)).strftime(
|
||||
"%a, %d %b %Y %H:%M:%S GMT")
|
||||
|
||||
return self.cookies
|
||||
|
||||
@@ -85,8 +85,7 @@ class _8musesAlbumExtractor(Extractor):
|
||||
"parent" : text.parse_int(album["parentId"]),
|
||||
"views" : text.parse_int(album["numberViews"]),
|
||||
"likes" : text.parse_int(album["numberLikes"]),
|
||||
"date" : text.parse_datetime(
|
||||
album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"),
|
||||
"date" : self.parse_datetime_iso(album["updatedAt"]),
|
||||
}
|
||||
|
||||
def _unobfuscate(self, data):
|
||||
|
||||
@@ -33,7 +33,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
|
||||
"gallery_id": text.parse_int(self.gallery_id),
|
||||
"title" : text.unescape(extr('title="', '"')),
|
||||
"studio" : extr(">studio</small>", "<").strip(),
|
||||
"date" : text.parse_datetime(extr(
|
||||
"date" : self.parse_datetime(extr(
|
||||
">released</small>", "<").strip(), "%m/%d/%Y"),
|
||||
"actors" : sorted(text.split_html(extr(
|
||||
'<ul class="item-details item-cast-list ', '</ul>'))[1:]),
|
||||
|
||||
@@ -33,7 +33,7 @@ class AgnphExtractor(booru.BooruExtractor):
|
||||
self.cookies.set("confirmed_age", "true", domain="agn.ph")
|
||||
|
||||
def _prepare(self, post):
|
||||
post["date"] = text.parse_timestamp(post["created_at"])
|
||||
post["date"] = self.parse_timestamp(post["created_at"])
|
||||
post["status"] = post["status"].strip()
|
||||
post["has_children"] = ("true" in post["has_children"])
|
||||
|
||||
|
||||
@@ -182,11 +182,11 @@ class Ao3WorkExtractor(Ao3Extractor):
|
||||
extr('<dd class="freeform tags">', "</dd>")),
|
||||
"lang" : extr('<dd class="language" lang="', '"'),
|
||||
"series" : extr('<dd class="series">', "</dd>"),
|
||||
"date" : text.parse_datetime(
|
||||
extr('<dd class="published">', "<"), "%Y-%m-%d"),
|
||||
"date_completed": text.parse_datetime(
|
||||
extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"),
|
||||
"date_updated" : text.parse_timestamp(
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'<dd class="published">', "<")),
|
||||
"date_completed": self.parse_datetime_iso(extr(
|
||||
'>Completed:</dt><dd class="status">', "<")),
|
||||
"date_updated" : self.parse_timestamp(
|
||||
path.rpartition("updated_at=")[2]),
|
||||
"words" : text.parse_int(
|
||||
extr('<dd class="words">', "<").replace(",", "")),
|
||||
|
||||
@@ -49,8 +49,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
|
||||
files = self._extract_files(post)
|
||||
|
||||
post["count"] = len(files)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
|
||||
post["post_url"] = post_url = \
|
||||
f"{self.root}/b/{post['boardSlug']}/{post['id']}"
|
||||
post["_http_headers"] = {"Referer": post_url + "?p=1"}
|
||||
|
||||
@@ -126,8 +126,7 @@ class ArtstationExtractor(Extractor):
|
||||
data["title"] = text.unescape(data["title"])
|
||||
data["description"] = text.unescape(text.remove_html(
|
||||
data["description"]))
|
||||
data["date"] = text.parse_datetime(
|
||||
data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
data["date"] = self.parse_datetime_iso(data["created_at"])
|
||||
|
||||
assets = data["assets"]
|
||||
del data["assets"]
|
||||
|
||||
@@ -9,10 +9,9 @@
|
||||
"""Extractors for https://aryion.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from .. import text, util, dt, exception
|
||||
from ..cache import cache
|
||||
from email.utils import parsedate_tz
|
||||
from datetime import datetime
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
|
||||
|
||||
@@ -156,7 +155,7 @@ class AryionExtractor(Extractor):
|
||||
"artist": artist,
|
||||
"path" : text.split_html(extr(
|
||||
"cookiecrumb'>", '</span'))[4:-1:2],
|
||||
"date" : datetime(*parsedate_tz(lmod)[:6]),
|
||||
"date" : dt.datetime(*parsedate_tz(lmod)[:6]),
|
||||
"size" : text.parse_int(clen),
|
||||
"views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
|
||||
"width" : text.parse_int(extr("Resolution</b>:", "x")),
|
||||
|
||||
@@ -123,7 +123,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
|
||||
"chapter_minor" : minor,
|
||||
"chapter_string": info,
|
||||
"chapter_id" : text.parse_int(self.chapter_id),
|
||||
"date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
|
||||
"date" : self.parse_timestamp(extr(' time="', '"')[:-3]),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
@@ -167,8 +167,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
|
||||
|
||||
data["chapter"] = text.parse_int(chapter)
|
||||
data["chapter_minor"] = sep + minor
|
||||
data["date"] = text.parse_datetime(
|
||||
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
data["date"] = self.parse_datetime_iso(extr('time="', '"'))
|
||||
|
||||
url = f"{self.root}/title/{href}"
|
||||
results.append((url, data.copy()))
|
||||
@@ -188,9 +187,9 @@ def _manga_info(self, manga_id, page=None):
|
||||
"manga" : data["name"][1],
|
||||
"manga_id" : text.parse_int(manga_id),
|
||||
"manga_slug" : data["slug"][1],
|
||||
"manga_date" : text.parse_timestamp(
|
||||
"manga_date" : self.parse_timestamp(
|
||||
data["dateCreate"][1] // 1000),
|
||||
"manga_date_updated": text.parse_timestamp(
|
||||
"manga_date_updated": self.parse_timestamp(
|
||||
data["dateUpdate"][1] / 1000),
|
||||
"author" : json_list(data["authors"]),
|
||||
"artist" : json_list(data["artists"]),
|
||||
|
||||
@@ -67,7 +67,7 @@ class BehanceExtractor(Extractor):
|
||||
tags = [tag["title"] for tag in tags]
|
||||
data["tags"] = tags
|
||||
|
||||
data["date"] = text.parse_timestamp(
|
||||
data["date"] = self.parse_timestamp(
|
||||
data.get("publishedOn") or data.get("conceived_on") or 0)
|
||||
|
||||
if creator := data.get("creator"):
|
||||
|
||||
@@ -144,8 +144,8 @@ class BellazonExtractor(Extractor):
|
||||
"title": schema["headline"],
|
||||
"views": stats[0]["userInteractionCount"],
|
||||
"posts": stats[1]["userInteractionCount"],
|
||||
"date" : text.parse_datetime(schema["datePublished"]),
|
||||
"date_updated": text.parse_datetime(schema["dateModified"]),
|
||||
"date" : self.parse_datetime_iso(schema["datePublished"]),
|
||||
"date_updated": self.parse_datetime_iso(schema["dateModified"]),
|
||||
"description" : text.unescape(schema["text"]).strip(),
|
||||
"section" : path[-2],
|
||||
"author" : author["name"],
|
||||
@@ -169,7 +169,7 @@ class BellazonExtractor(Extractor):
|
||||
post = {
|
||||
"id": extr('id="elComment_', '"'),
|
||||
"author_url": extr(" href='", "'"),
|
||||
"date": text.parse_datetime(extr("datetime='", "'")),
|
||||
"date": self.parse_datetime_iso(extr("datetime='", "'")),
|
||||
"content": extr("<!-- Post content -->", "\n\t\t</div>"),
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ class BloggerExtractor(BaseExtractor):
|
||||
blog = self.api.blog_by_url("http://" + self.blog)
|
||||
blog["pages"] = blog["pages"]["totalItems"]
|
||||
blog["posts"] = blog["posts"]["totalItems"]
|
||||
blog["date"] = text.parse_datetime(blog["published"])
|
||||
blog["date"] = self.parse_datetime_iso(blog["published"])
|
||||
del blog["selfLink"]
|
||||
|
||||
findall_image = util.re(
|
||||
@@ -65,7 +65,7 @@ class BloggerExtractor(BaseExtractor):
|
||||
post["author"] = post["author"]["displayName"]
|
||||
post["replies"] = post["replies"]["totalItems"]
|
||||
post["content"] = text.remove_html(content)
|
||||
post["date"] = text.parse_datetime(post["published"])
|
||||
post["date"] = self.parse_datetime_iso(post["published"])
|
||||
del post["selfLink"]
|
||||
del post["blog"]
|
||||
|
||||
|
||||
@@ -135,8 +135,7 @@ class BlueskyExtractor(Extractor):
|
||||
|
||||
post["instance"] = self.instance
|
||||
post["post_id"] = self._pid(post)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
|
||||
|
||||
def _extract_files(self, post):
|
||||
if "embed" not in post:
|
||||
|
||||
@@ -78,7 +78,7 @@ class BoostyExtractor(Extractor):
|
||||
post["links"] = links = []
|
||||
|
||||
if "createdAt" in post:
|
||||
post["date"] = text.parse_timestamp(post["createdAt"])
|
||||
post["date"] = self.parse_timestamp(post["createdAt"])
|
||||
|
||||
for block in post["data"]:
|
||||
try:
|
||||
|
||||
@@ -70,8 +70,7 @@ class BoothItemExtractor(BoothExtractor):
|
||||
url + ".json", headers=headers, interval=False)
|
||||
|
||||
item["booth_category"] = item.pop("category", None)
|
||||
item["date"] = text.parse_datetime(
|
||||
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
item["date"] = self.parse_datetime_iso(item["published_at"])
|
||||
item["tags"] = [t["name"] for t in item["tags"]]
|
||||
|
||||
shop = item["shop"]
|
||||
|
||||
@@ -168,7 +168,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
item, 'name: "', ".")
|
||||
file["size"] = text.parse_int(text.extr(
|
||||
item, "size: ", " ,\n"))
|
||||
file["date"] = text.parse_datetime(text.extr(
|
||||
file["date"] = self.parse_datetime(text.extr(
|
||||
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
|
||||
|
||||
yield file
|
||||
|
||||
@@ -28,7 +28,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
|
||||
return {
|
||||
"album_id" : self.page_url.rpartition("/")[2],
|
||||
"album_name" : text.unescape(extr("<h1>", "<")),
|
||||
"date" : text.parse_datetime(extr(
|
||||
"date" : self.parse_datetime(extr(
|
||||
"<p>Created ", "<"), "%B %d %Y"),
|
||||
"description": text.unescape(extr("<p>", "<")),
|
||||
}
|
||||
|
||||
@@ -79,8 +79,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
|
||||
"url" : url,
|
||||
"album": text.remove_html(extr(
|
||||
"Added to <a", "</a>").rpartition(">")[2]),
|
||||
"date" : text.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
"user" : extr('username: "', '"'),
|
||||
}
|
||||
|
||||
@@ -116,8 +115,7 @@ class CheveretoVideoExtractor(CheveretoExtractor):
|
||||
'class="far fa-clock"></i>', "—"),
|
||||
"album": text.remove_html(extr(
|
||||
"Added to <a", "</a>").rpartition(">")[2]),
|
||||
"date" : text.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
"user" : extr('username: "', '"'),
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class CienArticleExtractor(CienExtractor):
|
||||
post["post_url"] = url
|
||||
post["post_id"] = text.parse_int(post_id)
|
||||
post["count"] = len(files)
|
||||
post["date"] = text.parse_datetime(post["datePublished"])
|
||||
post["date"] = self.parse_datetime_iso(post["datePublished"])
|
||||
|
||||
try:
|
||||
post["author"]["id"] = text.parse_int(author_id)
|
||||
|
||||
@@ -86,8 +86,7 @@ class CivitaiExtractor(Extractor):
|
||||
images = self.api.images_post(post["id"])
|
||||
|
||||
post = self.api.post(post["id"])
|
||||
post["date"] = text.parse_datetime(
|
||||
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["publishedAt"])
|
||||
data = {
|
||||
"post": post,
|
||||
"user": post.pop("user"),
|
||||
@@ -122,8 +121,7 @@ class CivitaiExtractor(Extractor):
|
||||
data["post"] = post = self._extract_meta_post(file)
|
||||
if post:
|
||||
post.pop("user", None)
|
||||
file["date"] = text.parse_datetime(
|
||||
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
file["date"] = self.parse_datetime_iso(file["createdAt"])
|
||||
|
||||
data["url"] = url = self._url(file)
|
||||
text.nameext_from_url(url, data)
|
||||
@@ -180,8 +178,7 @@ class CivitaiExtractor(Extractor):
|
||||
if "id" not in file and data["filename"].isdecimal():
|
||||
file["id"] = text.parse_int(data["filename"])
|
||||
if "date" not in file:
|
||||
file["date"] = text.parse_datetime(
|
||||
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
file["date"] = self.parse_datetime_iso(file["createdAt"])
|
||||
if self._meta_generation:
|
||||
file["generation"] = self._extract_meta_generation(file)
|
||||
yield data
|
||||
@@ -216,8 +213,7 @@ class CivitaiExtractor(Extractor):
|
||||
def _extract_meta_post(self, image):
|
||||
try:
|
||||
post = self.api.post(image["postId"])
|
||||
post["date"] = text.parse_datetime(
|
||||
post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["publishedAt"])
|
||||
return post
|
||||
except Exception as exc:
|
||||
return self.log.traceback(exc)
|
||||
@@ -278,8 +274,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
|
||||
versions = (version,)
|
||||
|
||||
for version in versions:
|
||||
version["date"] = text.parse_datetime(
|
||||
version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
version["date"] = self.parse_datetime_iso(version["createdAt"])
|
||||
|
||||
data = {
|
||||
"model" : model,
|
||||
@@ -593,8 +588,7 @@ class CivitaiGeneratedExtractor(CivitaiExtractor):
|
||||
self._require_auth()
|
||||
|
||||
for gen in self.api.orchestrator_queryGeneratedImages():
|
||||
gen["date"] = text.parse_datetime(
|
||||
gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
gen["date"] = self.parse_datetime_iso(gen["createdAt"])
|
||||
yield Message.Directory, gen
|
||||
for step in gen.pop("steps", ()):
|
||||
for image in step.pop("images", ()):
|
||||
|
||||
@@ -114,10 +114,8 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
|
||||
"chapter_hid" : ch["hid"],
|
||||
"chapter_string": chstr,
|
||||
"group" : ch["group_name"],
|
||||
"date" : text.parse_datetime(
|
||||
ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date_updated" : text.parse_datetime(
|
||||
ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(ch["created_at"][:19]),
|
||||
"date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
|
||||
"lang" : ch["lang"],
|
||||
}
|
||||
|
||||
|
||||
@@ -60,6 +60,6 @@ class ComicvineTagExtractor(BooruExtractor):
|
||||
_file_url = operator.itemgetter("original")
|
||||
|
||||
def _prepare(self, post):
|
||||
post["date"] = text.parse_datetime(
|
||||
post["date"] = self.parse_datetime(
|
||||
post["dateCreated"], "%a, %b %d %Y")
|
||||
post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]
|
||||
|
||||
@@ -19,11 +19,10 @@ import getpass
|
||||
import logging
|
||||
import requests
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from xml.etree import ElementTree
|
||||
from requests.adapters import HTTPAdapter
|
||||
from .message import Message
|
||||
from .. import config, output, text, util, cache, exception
|
||||
from .. import config, output, text, util, dt, cache, exception
|
||||
urllib3 = requests.packages.urllib3
|
||||
|
||||
|
||||
@@ -64,6 +63,10 @@ class Extractor():
|
||||
else:
|
||||
self.category = CATEGORY_MAP[self.category]
|
||||
|
||||
self.parse_datetime = dt.parse
|
||||
self.parse_datetime_iso = dt.parse_iso
|
||||
self.parse_timestamp = dt.parse_ts
|
||||
|
||||
self._cfgpath = ("extractor", self.category, self.subcategory)
|
||||
self._parentdir = ""
|
||||
|
||||
@@ -313,9 +316,9 @@ class Extractor():
|
||||
seconds = float(seconds)
|
||||
until = now + seconds
|
||||
elif until:
|
||||
if isinstance(until, datetime):
|
||||
if isinstance(until, dt.datetime):
|
||||
# convert to UTC timestamp
|
||||
until = util.datetime_to_timestamp(until)
|
||||
until = dt.to_ts(until)
|
||||
else:
|
||||
until = float(until)
|
||||
seconds = until - now
|
||||
@@ -327,7 +330,7 @@ class Extractor():
|
||||
return
|
||||
|
||||
if reason:
|
||||
t = datetime.fromtimestamp(until).time()
|
||||
t = dt.datetime.fromtimestamp(until).time()
|
||||
isotime = f"{t.hour:02}:{t.minute:02}:{t.second:02}"
|
||||
self.log.info("Waiting until %s (%s)", isotime, reason)
|
||||
time.sleep(seconds)
|
||||
@@ -652,7 +655,7 @@ class Extractor():
|
||||
self.log.warning(
|
||||
"cookies: %s/%s expired at %s",
|
||||
cookie.domain.lstrip("."), cookie.name,
|
||||
datetime.fromtimestamp(cookie.expires))
|
||||
dt.datetime.fromtimestamp(cookie.expires))
|
||||
continue
|
||||
|
||||
elif diff <= 86400:
|
||||
@@ -694,7 +697,7 @@ class Extractor():
|
||||
ts = self.config(key, default)
|
||||
if isinstance(ts, str):
|
||||
try:
|
||||
ts = int(datetime.strptime(ts, fmt).timestamp())
|
||||
ts = int(dt.parse(ts, fmt).timestamp())
|
||||
except ValueError as exc:
|
||||
self.log.warning("Unable to parse '%s': %s", key, exc)
|
||||
ts = default
|
||||
|
||||
@@ -47,7 +47,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
|
||||
"album_name" : text.unescape(extr('title="', '"')),
|
||||
"album_size" : text.parse_bytes(extr(
|
||||
'<p class="title">', "B")),
|
||||
"date" : text.parse_datetime(extr(
|
||||
"date" : self.parse_datetime(extr(
|
||||
'<p class="title">', '<'), "%d.%m.%Y"),
|
||||
"description": text.unescape(text.unescape( # double
|
||||
desc.rpartition(" [R")[0])),
|
||||
|
||||
@@ -113,7 +113,7 @@ class CyberfileFileExtractor(CyberfileExtractor):
|
||||
"Filesize:", "</tr>"))[:-1]),
|
||||
"tags" : text.split_html(extr(
|
||||
"Keywords:", "</tr>")),
|
||||
"date" : text.parse_datetime(text.remove_html(extr(
|
||||
"date" : self.parse_datetime(text.remove_html(extr(
|
||||
"Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"),
|
||||
"permissions": text.remove_html(extr(
|
||||
"Permissions:", "</tr>")).split(" & "),
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances"""
|
||||
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text, util
|
||||
import datetime
|
||||
from .. import text, util, dt
|
||||
|
||||
|
||||
class DanbooruExtractor(BaseExtractor):
|
||||
@@ -69,8 +68,7 @@ class DanbooruExtractor(BaseExtractor):
|
||||
continue
|
||||
|
||||
text.nameext_from_url(url, post)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = dt.parse_iso(post["created_at"])
|
||||
|
||||
post["tags"] = (
|
||||
post["tag_string"].split(" ")
|
||||
@@ -357,11 +355,11 @@ class DanbooruPopularExtractor(DanbooruExtractor):
|
||||
def metadata(self):
|
||||
self.params = params = text.parse_query(self.groups[-1])
|
||||
scale = params.get("scale", "day")
|
||||
date = params.get("date") or datetime.date.today().isoformat()
|
||||
date = params.get("date") or dt.date.today().isoformat()
|
||||
|
||||
if scale == "week":
|
||||
date = datetime.date.fromisoformat(date)
|
||||
date = (date - datetime.timedelta(days=date.weekday())).isoformat()
|
||||
date = dt.date.fromisoformat(date)
|
||||
date = (date - dt.timedelta(days=date.weekday())).isoformat()
|
||||
elif scale == "month":
|
||||
date = date[:-3]
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
|
||||
"chapter_minor": minor,
|
||||
"group" : manga["groups"][group_id].split(" & "),
|
||||
"group_id" : text.parse_int(group_id),
|
||||
"date" : text.parse_timestamp(data["release_date"][group_id]),
|
||||
"date" : self.parse_timestamp(data["release_date"][group_id]),
|
||||
"lang" : util.NONE,
|
||||
"language" : util.NONE,
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://www.deviantart.com/"""
|
||||
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from .. import text, util, dt, exception
|
||||
from ..cache import cache, memcache
|
||||
import collections
|
||||
import mimetypes
|
||||
@@ -259,7 +259,7 @@ class DeviantartExtractor(Extractor):
|
||||
|
||||
deviation["published_time"] = text.parse_int(
|
||||
deviation["published_time"])
|
||||
deviation["date"] = text.parse_timestamp(
|
||||
deviation["date"] = self.parse_timestamp(
|
||||
deviation["published_time"])
|
||||
|
||||
if self.comments:
|
||||
@@ -1187,8 +1187,8 @@ class DeviantartStatusExtractor(DeviantartExtractor):
|
||||
deviation["username"] = deviation["author"]["username"]
|
||||
deviation["_username"] = deviation["username"].lower()
|
||||
|
||||
deviation["date"] = dt = text.parse_datetime(deviation["ts"])
|
||||
deviation["published_time"] = int(util.datetime_to_timestamp(dt))
|
||||
deviation["date"] = d = self.parse_datetime_iso(deviation["ts"])
|
||||
deviation["published_time"] = int(dt.to_ts(d))
|
||||
|
||||
deviation["da_category"] = "Status"
|
||||
deviation["category_path"] = "status"
|
||||
|
||||
@@ -72,9 +72,7 @@ class DiscordExtractor(Extractor):
|
||||
"author_files": [],
|
||||
"message": self.extract_message_text(message),
|
||||
"message_id": message["id"],
|
||||
"date": text.parse_datetime(
|
||||
message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z"
|
||||
),
|
||||
"date": self.parse_datetime_iso(message["timestamp"]),
|
||||
"files": []
|
||||
})
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
|
||||
"author" : text.remove_html(author),
|
||||
"group" : (text.remove_html(group) or
|
||||
text.extr(group, ' alt="', '"')),
|
||||
"date" : text.parse_datetime(extr(
|
||||
"date" : self.parse_datetime(extr(
|
||||
'"icon-calendar"></i> ', '<'), "%b %d, %Y"),
|
||||
"tags" : text.split_html(extr(
|
||||
"class='tags'>", "<div id='chapter-actions'")),
|
||||
@@ -166,8 +166,6 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
|
||||
data["scanlator"] = content[1].text[11:]
|
||||
data["tags"] = content[2].text[6:].lower().split(", ")
|
||||
data["title"] = element[5].text
|
||||
data["date"] = text.parse_datetime(
|
||||
element[1].text, "%Y-%m-%dT%H:%M:%S%z")
|
||||
data["date_updated"] = text.parse_datetime(
|
||||
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
|
||||
data["date"] = self.parse_datetime_iso(element[1].text)
|
||||
data["date_updated"] = self.parse_datetime_iso(element[2].text)
|
||||
yield Message.Queue, element[4].text, data
|
||||
|
||||
@@ -51,8 +51,7 @@ class E621Extractor(danbooru.DanbooruExtractor):
|
||||
|
||||
post["filename"] = file["md5"]
|
||||
post["extension"] = file["ext"]
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
|
||||
post.update(data)
|
||||
yield Message.Directory, post
|
||||
|
||||
@@ -96,7 +96,7 @@ class EromeAlbumExtractor(EromeExtractor):
|
||||
if not date:
|
||||
ts = text.extr(group, '?v=', '"')
|
||||
if len(ts) > 1:
|
||||
date = text.parse_timestamp(ts)
|
||||
date = self.parse_timestamp(ts)
|
||||
|
||||
data = {
|
||||
"album_id": album_id,
|
||||
|
||||
@@ -216,7 +216,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
def _items_hitomi(self):
|
||||
if self.config("metadata", False):
|
||||
data = self.metadata_from_api()
|
||||
data["date"] = text.parse_timestamp(data["posted"])
|
||||
data["date"] = self.parse_timestamp(data["posted"])
|
||||
else:
|
||||
data = {}
|
||||
|
||||
@@ -233,7 +233,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
data = self.metadata_from_page(page)
|
||||
if self.config("metadata", False):
|
||||
data.update(self.metadata_from_api())
|
||||
data["date"] = text.parse_timestamp(data["posted"])
|
||||
data["date"] = self.parse_timestamp(data["posted"])
|
||||
if self.config("tags", False):
|
||||
tags = collections.defaultdict(list)
|
||||
for tag in data["tags"]:
|
||||
@@ -258,8 +258,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
"_" : extr('<div id="gdc"><div class="cs ct', '"'),
|
||||
"eh_category" : extr('>', '<'),
|
||||
"uploader" : extr('<div id="gdn">', '</div>'),
|
||||
"date" : text.parse_datetime(extr(
|
||||
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'>Posted:</td><td class="gdt2">', '</td>')),
|
||||
"parent" : extr(
|
||||
'>Parent:</td><td class="gdt2"><a href="', '"'),
|
||||
"expunged" : "Yes" != extr(
|
||||
|
||||
@@ -108,7 +108,7 @@ class FacebookExtractor(Extractor):
|
||||
'"message":{"delight_ranges"',
|
||||
'"},"message_preferred_body"'
|
||||
).rsplit('],"text":"', 1)[-1]),
|
||||
"date": text.parse_timestamp(
|
||||
"date": self.parse_timestamp(
|
||||
text.extr(photo_page, '\\"publish_time\\":', ',') or
|
||||
text.extr(photo_page, '"created_time":', ',')
|
||||
),
|
||||
@@ -172,7 +172,7 @@ class FacebookExtractor(Extractor):
|
||||
"user_id": text.extr(
|
||||
video_page, '"owner":{"__typename":"User","id":"', '"'
|
||||
),
|
||||
"date": text.parse_timestamp(text.extr(
|
||||
"date": self.parse_timestamp(text.extr(
|
||||
video_page, '\\"publish_time\\":', ','
|
||||
)),
|
||||
"type": "video"
|
||||
|
||||
@@ -128,7 +128,7 @@ class FanboxExtractor(Extractor):
|
||||
if file.get("extension", "").lower() in exts
|
||||
]
|
||||
|
||||
post["date"] = text.parse_datetime(post["publishedDatetime"])
|
||||
post["date"] = self.parse_datetime_iso(post["publishedDatetime"])
|
||||
post["text"] = content_body.get("text") if content_body else None
|
||||
post["isCoverImage"] = False
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class FanslyExtractor(Extractor):
|
||||
for post in self.posts():
|
||||
files = self._extract_files(post)
|
||||
post["count"] = len(files)
|
||||
post["date"] = text.parse_timestamp(post["createdAt"])
|
||||
post["date"] = self.parse_timestamp(post["createdAt"])
|
||||
|
||||
yield Message.Directory, post
|
||||
for post["num"], file in enumerate(files, 1):
|
||||
@@ -117,8 +117,8 @@ class FanslyExtractor(Extractor):
|
||||
file = {
|
||||
**variant,
|
||||
"format": variant["type"],
|
||||
"date": text.parse_timestamp(media["createdAt"]),
|
||||
"date_updated": text.parse_timestamp(media["updatedAt"]),
|
||||
"date": self.parse_timestamp(media["createdAt"]),
|
||||
"date_updated": self.parse_timestamp(media["updatedAt"]),
|
||||
}
|
||||
|
||||
if "metadata" in location:
|
||||
|
||||
@@ -101,7 +101,7 @@ class FantiaExtractor(Extractor):
|
||||
"comment": resp["comment"],
|
||||
"rating": resp["rating"],
|
||||
"posted_at": resp["posted_at"],
|
||||
"date": text.parse_datetime(
|
||||
"date": self.parse_datetime(
|
||||
resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
|
||||
"fanclub_id": resp["fanclub"]["id"],
|
||||
"fanclub_user_id": resp["fanclub"]["user"]["id"],
|
||||
|
||||
@@ -98,7 +98,7 @@ class FlickrImageExtractor(FlickrExtractor):
|
||||
photo["comments"] = text.parse_int(photo["comments"]["_content"])
|
||||
photo["description"] = photo["description"]["_content"]
|
||||
photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
|
||||
photo["date"] = text.parse_timestamp(photo["dateuploaded"])
|
||||
photo["date"] = self.parse_timestamp(photo["dateuploaded"])
|
||||
photo["views"] = text.parse_int(photo["views"])
|
||||
photo["id"] = text.parse_int(photo["id"])
|
||||
|
||||
@@ -489,7 +489,7 @@ class FlickrAPI(oauth.OAuth1API):
|
||||
def _extract_format(self, photo):
|
||||
photo["description"] = photo["description"]["_content"].strip()
|
||||
photo["views"] = text.parse_int(photo["views"])
|
||||
photo["date"] = text.parse_timestamp(photo["dateupload"])
|
||||
photo["date"] = self.parse_timestamp(photo["dateupload"])
|
||||
photo["tags"] = photo["tags"].split()
|
||||
|
||||
self._extract_metadata(photo)
|
||||
|
||||
@@ -143,7 +143,7 @@ class FuraffinityExtractor(Extractor):
|
||||
data["folders"] = () # folders not present in old layout
|
||||
|
||||
data["user"] = self.user or data["artist_url"]
|
||||
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
|
||||
data["date"] = self.parse_timestamp(data["filename"].partition(".")[0])
|
||||
data["description"] = self._process_description(data["_description"])
|
||||
data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-"
|
||||
f"{path.rsplit('/', 2)[1]}.jpg")
|
||||
|
||||
@@ -55,8 +55,7 @@ class Furry34Extractor(BooruExtractor):
|
||||
|
||||
def _prepare(self, post):
|
||||
post.pop("files", None)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created"])
|
||||
post["filename"], _, post["format"] = post["filename"].rpartition(".")
|
||||
if "tags" in post:
|
||||
post["tags"] = [t["value"] for t in post["tags"]]
|
||||
|
||||
@@ -246,7 +246,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
|
||||
for fav in favs:
|
||||
for post in self._api_request({"id": fav["favorite"]}):
|
||||
post["date_favorited"] = text.parse_timestamp(fav["added"])
|
||||
post["date_favorited"] = self.parse_timestamp(fav["added"])
|
||||
yield post
|
||||
|
||||
params["pid"] += 1
|
||||
@@ -273,7 +273,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
|
||||
for fav in favs:
|
||||
for post in self._api_request({"id": fav["favorite"]}):
|
||||
post["date_favorited"] = text.parse_timestamp(fav["added"])
|
||||
post["date_favorited"] = self.parse_timestamp(fav["added"])
|
||||
yield post
|
||||
|
||||
params["pid"] -= 1
|
||||
|
||||
@@ -35,8 +35,7 @@ class GelbooruV01Extractor(booru.BooruExtractor):
|
||||
}
|
||||
|
||||
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%d %H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
|
||||
return post
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
||||
|
||||
def _prepare(self, post):
|
||||
post["tags"] = post["tags"].strip()
|
||||
post["date"] = text.parse_datetime(
|
||||
post["date"] = self.parse_datetime(
|
||||
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||
|
||||
def _html(self, post):
|
||||
|
||||
@@ -52,7 +52,7 @@ class GirlsreleasedSetExtractor(GirlsreleasedExtractor):
|
||||
"id": json["id"],
|
||||
"site": json["site"],
|
||||
"model": [model for _, model in json["models"]],
|
||||
"date": text.parse_timestamp(json["date"]),
|
||||
"date": self.parse_timestamp(json["date"]),
|
||||
"count": len(json["images"]),
|
||||
"url": "https://girlsreleased.com/set/" + json["id"],
|
||||
}
|
||||
|
||||
@@ -101,9 +101,8 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
|
||||
"model": model,
|
||||
"model_list": self._parse_model_list(model),
|
||||
"tags": text.split_html(tags)[1::2],
|
||||
"date": text.parse_datetime(
|
||||
text.extr(page, 'class="hover-time" title="', '"')[:19],
|
||||
"%Y-%m-%d %H:%M:%S"),
|
||||
"date": self.parse_datetime_iso(text.extr(
|
||||
page, 'class="hover-time" title="', '"')[:19]),
|
||||
"is_favorite": self._parse_is_favorite(page),
|
||||
"source_filename": source,
|
||||
"uploader": uploader,
|
||||
|
||||
@@ -34,7 +34,7 @@ class HatenablogExtractor(Extractor):
|
||||
|
||||
def _handle_article(self, article: str):
|
||||
extr = text.extract_from(article)
|
||||
date = text.parse_datetime(extr('<time datetime="', '"'))
|
||||
date = self.parse_datetime_iso(extr('<time datetime="', '"'))
|
||||
entry_link = text.unescape(extr('<a href="', '"'))
|
||||
entry = entry_link.partition("/entry/")[2]
|
||||
title = text.unescape(extr('>', '<'))
|
||||
|
||||
@@ -86,7 +86,7 @@ class HentaifoundryExtractor(Extractor):
|
||||
.replace("\r\n", "\n")),
|
||||
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
|
||||
"class='ratings_box'", "</div>"), "title='", "'")],
|
||||
"date" : text.parse_datetime(extr("datetime='", "'")),
|
||||
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
|
||||
"views" : text.parse_int(extr(">Views</span>", "<")),
|
||||
"score" : text.parse_int(extr(">Vote Score</span>", "<")),
|
||||
"media" : text.unescape(extr(">Media</span>", "<").strip()),
|
||||
@@ -126,7 +126,7 @@ class HentaifoundryExtractor(Extractor):
|
||||
"title" : text.unescape(extr(
|
||||
"<div class='titlebar'>", "</a>").rpartition(">")[2]),
|
||||
"author" : text.unescape(extr('alt="', '"')),
|
||||
"date" : text.parse_datetime(extr(
|
||||
"date" : self.parse_datetime(extr(
|
||||
">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
|
||||
"status" : extr("class='indent'>", "<"),
|
||||
}
|
||||
|
||||
@@ -35,8 +35,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
|
||||
"language" : info["language"]["name"],
|
||||
"lang" : util.language_to_code(info["language"]["name"]),
|
||||
"tags" : [t["slug"] for t in info["tags"]],
|
||||
"date" : text.parse_datetime(
|
||||
info["uploaded_at"], "%Y-%m-%d"),
|
||||
"date" : self.parse_datetime_iso(info["uploaded_at"]),
|
||||
}
|
||||
for key in ("artists", "authors", "groups", "characters",
|
||||
"relationships", "parodies"):
|
||||
|
||||
@@ -84,7 +84,7 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
|
||||
"type" : info["type"].capitalize(),
|
||||
"language" : language,
|
||||
"lang" : util.language_to_code(language),
|
||||
"date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
|
||||
"date" : self.parse_datetime_iso(date),
|
||||
"tags" : tags,
|
||||
"artist" : [o["artist"] for o in iget("artists") or ()],
|
||||
"group" : [o["group"] for o in iget("groups") or ()],
|
||||
|
||||
@@ -53,11 +53,9 @@ class ImagechestGalleryExtractor(GalleryExtractor):
|
||||
def _metadata_api(self, page):
|
||||
post = self.api.post(self.gallery_id)
|
||||
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["created"])
|
||||
for img in post["images"]:
|
||||
img["date"] = text.parse_datetime(
|
||||
img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
img["date"] = self.parse_datetime_iso(img["created"])
|
||||
|
||||
post["gallery_id"] = self.gallery_id
|
||||
post.pop("image_count", None)
|
||||
|
||||
@@ -159,8 +159,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
|
||||
"width" : text.parse_int(extr('"og:image:width" content="', '"')),
|
||||
"height": text.parse_int(extr('"og:image:height" content="', '"')),
|
||||
"album" : extr("Added to <a", "</a>"),
|
||||
"date" : text.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
"user" : util.json_loads(extr(
|
||||
"CHV.obj.resource=", "};") + "}").get("user"),
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
|
||||
"title": text.unescape(extr("<h1>", "</h1>")),
|
||||
"count": text.parse_int(extr(
|
||||
"total of images in this gallery: ", " ")),
|
||||
"date" : text.parse_datetime(
|
||||
"date" : self.parse_datetime(
|
||||
extr("created on ", " by <")
|
||||
.replace("th, ", " ", 1).replace("nd, ", " ", 1)
|
||||
.replace("st, ", " ", 1), "%B %d %Y at %H:%M"),
|
||||
|
||||
@@ -38,7 +38,7 @@ class ImgurExtractor(Extractor):
|
||||
|
||||
image["url"] = url = \
|
||||
f"https://i.imgur.com/{image['id']}.{image['ext']}"
|
||||
image["date"] = text.parse_datetime(image["created_at"])
|
||||
image["date"] = self.parse_datetime_iso(image["created_at"])
|
||||
image["_http_validate"] = self._validate
|
||||
text.nameext_from_url(url, image)
|
||||
|
||||
@@ -106,7 +106,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
|
||||
|
||||
del album["media"]
|
||||
count = len(images)
|
||||
album["date"] = text.parse_datetime(album["created_at"])
|
||||
album["date"] = self.parse_datetime_iso(album["created_at"])
|
||||
|
||||
try:
|
||||
del album["ad_url"]
|
||||
|
||||
@@ -35,8 +35,8 @@ class InkbunnyExtractor(Extractor):
|
||||
|
||||
for post in self.posts():
|
||||
post.update(metadata)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(
|
||||
post["create_datetime"][:19])
|
||||
post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
|
||||
post["ratings"] = [r["name"] for r in post["ratings"]]
|
||||
files = post["files"]
|
||||
@@ -52,8 +52,8 @@ class InkbunnyExtractor(Extractor):
|
||||
for post["num"], file in enumerate(files, 1):
|
||||
post.update(file)
|
||||
post["deleted"] = (file["deleted"] == "t")
|
||||
post["date"] = text.parse_datetime(
|
||||
file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(
|
||||
file["create_datetime"][:19])
|
||||
text.nameext_from_url(file["file_name"], post)
|
||||
|
||||
url = file["file_url_full"]
|
||||
|
||||
@@ -173,7 +173,7 @@ class InstagramExtractor(Extractor):
|
||||
post_url = f"{self.root}/stories/highlights/{reel_id}/"
|
||||
data = {
|
||||
"user" : post.get("user"),
|
||||
"expires": text.parse_timestamp(expires),
|
||||
"expires": self.parse_timestamp(expires),
|
||||
"post_id": reel_id,
|
||||
"post_shortcode": shortcode_from_id(reel_id),
|
||||
"post_url": post_url,
|
||||
@@ -224,7 +224,7 @@ class InstagramExtractor(Extractor):
|
||||
data["owner_id"] = owner["pk"]
|
||||
data["username"] = owner.get("username")
|
||||
data["fullname"] = owner.get("full_name")
|
||||
data["post_date"] = data["date"] = text.parse_timestamp(
|
||||
data["post_date"] = data["date"] = self.parse_timestamp(
|
||||
post.get("taken_at") or post.get("created_at") or post.get("seen"))
|
||||
data["_files"] = files = []
|
||||
for num, item in enumerate(items, 1):
|
||||
@@ -278,7 +278,7 @@ class InstagramExtractor(Extractor):
|
||||
|
||||
media = {
|
||||
"num" : num,
|
||||
"date" : text.parse_timestamp(item.get("taken_at") or
|
||||
"date" : self.parse_timestamp(item.get("taken_at") or
|
||||
media.get("taken_at") or
|
||||
post.get("taken_at")),
|
||||
"media_id" : item["pk"],
|
||||
@@ -299,7 +299,7 @@ class InstagramExtractor(Extractor):
|
||||
if "reshared_story_media_author" in item:
|
||||
media["author"] = item["reshared_story_media_author"]
|
||||
if "expiring_at" in item:
|
||||
media["expires"] = text.parse_timestamp(post["expiring_at"])
|
||||
media["expires"] = self.parse_timestamp(post["expiring_at"])
|
||||
|
||||
self._extract_tagged_users(item, media)
|
||||
files.append(media)
|
||||
@@ -342,7 +342,7 @@ class InstagramExtractor(Extractor):
|
||||
"post_id" : post["id"],
|
||||
"post_shortcode": post["shortcode"],
|
||||
"post_url" : f"{self.root}/p/{post['shortcode']}/",
|
||||
"post_date" : text.parse_timestamp(post["taken_at_timestamp"]),
|
||||
"post_date" : self.parse_timestamp(post["taken_at_timestamp"]),
|
||||
"description": text.parse_unicode_escapes("\n".join(
|
||||
edge["node"]["text"]
|
||||
for edge in post["edge_media_to_caption"]["edges"]
|
||||
@@ -634,7 +634,7 @@ class InstagramStoriesTrayExtractor(InstagramExtractor):
|
||||
def items(self):
|
||||
base = f"{self.root}/stories/id:"
|
||||
for story in self.api.reels_tray():
|
||||
story["date"] = text.parse_timestamp(story["latest_reel_media"])
|
||||
story["date"] = self.parse_timestamp(story["latest_reel_media"])
|
||||
story["_extractor"] = InstagramStoriesExtractor
|
||||
yield Message.Queue, f"{base}{story['id']}/", story
|
||||
|
||||
|
||||
@@ -36,8 +36,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
|
||||
'{"":' + data.replace('\\"', '"')))
|
||||
|
||||
doc = data["initialDocumentData"]["document"]
|
||||
doc["date"] = text.parse_datetime(
|
||||
doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
doc["date"] = self.parse_datetime_iso(
|
||||
doc["originalPublishDateInISOString"])
|
||||
|
||||
self.count = text.parse_int(doc["pageCount"])
|
||||
self.base = (f"https://image.isu.pub/{doc['revisionId']}-"
|
||||
|
||||
@@ -32,8 +32,7 @@ class ItakuExtractor(Extractor):
|
||||
def items(self):
|
||||
if images := self.images():
|
||||
for image in images:
|
||||
image["date"] = text.parse_datetime(
|
||||
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
image["date"] = self.parse_datetime_iso(image["date_added"])
|
||||
for category, tags in image.pop("categorized_tags").items():
|
||||
image[f"tags_{category.lower()}"] = [
|
||||
t["name"] for t in tags]
|
||||
@@ -60,15 +59,14 @@ class ItakuExtractor(Extractor):
|
||||
for post in posts:
|
||||
images = post.pop("gallery_images") or ()
|
||||
post["count"] = len(images)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
post["date"] = self.parse_datetime_iso(post["date_added"])
|
||||
post["tags"] = [t["name"] for t in post["tags"]]
|
||||
|
||||
yield Message.Directory, post
|
||||
for post["num"], image in enumerate(images, 1):
|
||||
post["file"] = image
|
||||
image["date"] = text.parse_datetime(
|
||||
image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
image["date"] = self.parse_datetime_iso(
|
||||
image["date_added"])
|
||||
|
||||
url = image["image"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
@@ -122,10 +122,10 @@ class IwaraExtractor(Extractor):
|
||||
info["file_id"] = file_info.get("id")
|
||||
info["filename"] = filename
|
||||
info["extension"] = extension
|
||||
info["date"] = text.parse_datetime(
|
||||
file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
info["date_updated"] = text.parse_datetime(
|
||||
file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
info["date"] = self.parse_datetime_iso(
|
||||
file_info.get("createdAt"))
|
||||
info["date_updated"] = self.parse_datetime_iso(
|
||||
file_info.get("updatedAt"))
|
||||
info["mime"] = file_info.get("mime")
|
||||
info["size"] = file_info.get("size")
|
||||
info["width"] = file_info.get("width")
|
||||
@@ -144,8 +144,7 @@ class IwaraExtractor(Extractor):
|
||||
"status" : user.get("status"),
|
||||
"role" : user.get("role"),
|
||||
"premium": user.get("premium"),
|
||||
"date" : text.parse_datetime(
|
||||
user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"),
|
||||
"date" : self.parse_datetime_iso(user.get("createdAt")),
|
||||
"description": profile.get("body"),
|
||||
}
|
||||
|
||||
|
||||
@@ -32,8 +32,7 @@ class KabeuchiUserExtractor(Extractor):
|
||||
if post.get("is_ad") or not post["image1"]:
|
||||
continue
|
||||
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%d %H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
yield Message.Directory, post
|
||||
|
||||
for key in keys:
|
||||
|
||||
@@ -244,7 +244,7 @@ class KemonoExtractor(Extractor):
|
||||
def _parse_datetime(self, date_string):
|
||||
if len(date_string) > 19:
|
||||
date_string = date_string[:19]
|
||||
return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
|
||||
return self.parse_datetime_iso(date_string)
|
||||
|
||||
def _revisions(self, posts):
|
||||
return itertools.chain.from_iterable(
|
||||
|
||||
@@ -119,8 +119,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
|
||||
'property="image:width" content="', '"')),
|
||||
"height": text.parse_int(extr(
|
||||
'property="image:height" content="', '"')),
|
||||
"date" : text.parse_datetime(extr(
|
||||
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
|
||||
}
|
||||
|
||||
text.nameext_from_url(data["url"], data)
|
||||
|
||||
@@ -45,7 +45,7 @@ class LivedoorExtractor(Extractor):
|
||||
"title" : text.unescape(extr('dc:title="', '"')),
|
||||
"categories" : extr('dc:subject="', '"').partition(",")[::2],
|
||||
"description": extr('dc:description="', '"'),
|
||||
"date" : text.parse_datetime(extr('dc:date="', '"')),
|
||||
"date" : self.parse_datetime_iso(extr('dc:date="', '"')),
|
||||
"tags" : text.split_html(tags)[1:] if tags else [],
|
||||
"user" : self.user,
|
||||
"body" : body,
|
||||
|
||||
@@ -29,7 +29,7 @@ class LofterExtractor(Extractor):
|
||||
post = post["post"]
|
||||
|
||||
post["blog_name"] = post["blogInfo"]["blogName"]
|
||||
post["date"] = text.parse_timestamp(post["publishTime"] // 1000)
|
||||
post["date"] = self.parse_timestamp(post["publishTime"] // 1000)
|
||||
post_type = post["type"]
|
||||
|
||||
# Article
|
||||
|
||||
@@ -69,7 +69,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
|
||||
image["thumbnail"] = ""
|
||||
|
||||
image["tags"] = [item["text"] for item in image["tags"]]
|
||||
image["date"] = text.parse_timestamp(image["created"])
|
||||
image["date"] = self.parse_timestamp(image["created"])
|
||||
image["id"] = text.parse_int(image["id"])
|
||||
|
||||
url = (image["url_to_original"] or image["url_to_video"]
|
||||
@@ -188,7 +188,7 @@ fragment AlbumStandard on Album {
|
||||
album["created_by"] = album["created_by"]["display_name"]
|
||||
|
||||
album["id"] = text.parse_int(album["id"])
|
||||
album["date"] = text.parse_timestamp(album["created"])
|
||||
album["date"] = self.parse_timestamp(album["created"])
|
||||
|
||||
return album
|
||||
|
||||
|
||||
@@ -47,8 +47,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
|
||||
"path": text.unescape(extr('href="', '"')),
|
||||
"chapter_string": text.unescape(extr(">", "<")),
|
||||
"size": text.parse_bytes(extr("<td>", "</td>")),
|
||||
"date": text.parse_datetime(
|
||||
extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
|
||||
"date": self.parse_datetime_iso(extr("<td>", "</td>").strip()),
|
||||
})
|
||||
|
||||
if self.config("chapter-reverse"):
|
||||
|
||||
@@ -68,7 +68,7 @@ class MangadexExtractor(Extractor):
|
||||
"chapter" : text.parse_int(chnum),
|
||||
"chapter_minor": f"{sep}{minor}",
|
||||
"chapter_id": chapter["id"],
|
||||
"date" : text.parse_datetime(cattributes["publishAt"]),
|
||||
"date" : self.parse_datetime_iso(cattributes["publishAt"]),
|
||||
"group" : [group["attributes"]["name"]
|
||||
for group in relationships["scanlation_group"]],
|
||||
"lang" : lang,
|
||||
@@ -109,8 +109,8 @@ class MangadexCoversExtractor(MangadexExtractor):
|
||||
"cover" : cattributes["fileName"],
|
||||
"lang" : cattributes.get("locale"),
|
||||
"volume" : text.parse_int(cattributes["volume"]),
|
||||
"date" : text.parse_datetime(cattributes["createdAt"]),
|
||||
"date_updated": text.parse_datetime(cattributes["updatedAt"]),
|
||||
"date" : self.parse_datetime_iso(cattributes["createdAt"]),
|
||||
"date_updated": self.parse_datetime_iso(cattributes["updatedAt"]),
|
||||
}
|
||||
|
||||
|
||||
@@ -454,7 +454,7 @@ def _manga_info(self, uuid):
|
||||
"manga_id": manga["id"],
|
||||
"manga_titles": [t.popitem()[1]
|
||||
for t in mattr.get("altTitles") or ()],
|
||||
"manga_date" : text.parse_datetime(mattr.get("createdAt")),
|
||||
"manga_date" : self.parse_datetime_iso(mattr.get("createdAt")),
|
||||
"description" : (mattr["description"].get("en") or
|
||||
next(iter(mattr["description"].values()), "")),
|
||||
"demographic": mattr.get("publicationDemographic"),
|
||||
|
||||
@@ -99,7 +99,7 @@ class MangafoxMangaExtractor(MangaExtractor):
|
||||
"chapter" : text.parse_int(chapter),
|
||||
"chapter_minor" : minor or "",
|
||||
"chapter_string": cstr,
|
||||
"date" : text.parse_datetime(
|
||||
"date" : self.parse_datetime(
|
||||
extr('right">', '</span>'), "%b %d, %Y"),
|
||||
}
|
||||
chapter.update(data)
|
||||
|
||||
@@ -50,10 +50,10 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
|
||||
extr = text.extract_from(page)
|
||||
|
||||
data = {
|
||||
"date" : text.parse_datetime(extr(
|
||||
'"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date_updated": text.parse_datetime(extr(
|
||||
'"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'"datePublished": "', '"')[:19]),
|
||||
"date_updated": self.parse_datetime_iso(extr(
|
||||
'"dateModified": "', '"')[:19]),
|
||||
"manga_id" : text.parse_int(extr("comic_id =", ";")),
|
||||
"chapter_id" : text.parse_int(extr("chapter_id =", ";")),
|
||||
"manga" : extr("comic_name =", ";").strip('" '),
|
||||
@@ -99,7 +99,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
|
||||
manga = text.unescape(extr("<h1>", "<"))
|
||||
author = text.remove_html(extr("<li>Author(s) :", "</a>"))
|
||||
status = extr("<li>Status :", "<").strip()
|
||||
update = text.parse_datetime(extr(
|
||||
update = self.parse_datetime(extr(
|
||||
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p")
|
||||
tags = text.split_html(extr(">Genres :", "</li>"))[::2]
|
||||
|
||||
@@ -121,7 +121,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
|
||||
"chapter" : text.parse_int(chapter),
|
||||
"chapter_minor": (sep and ".") + minor,
|
||||
"title" : title.partition(": ")[2],
|
||||
"date" : text.parse_datetime(date, "%b-%d-%Y %H:%M"),
|
||||
"date" : self.parse_datetime(date, "%b-%d-%Y %H:%M"),
|
||||
"lang" : "en",
|
||||
"language": "English",
|
||||
}))
|
||||
|
||||
@@ -101,7 +101,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
||||
"language" : util.code_to_language(lang),
|
||||
"source" : chapter["srcTitle"],
|
||||
"source_id" : chapter["sourceId"],
|
||||
"date" : text.parse_timestamp(chapter["dateCreate"] // 1000),
|
||||
"date" : self.parse_timestamp(chapter["dateCreate"] // 1000),
|
||||
}
|
||||
|
||||
def images(self, _):
|
||||
@@ -138,7 +138,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
|
||||
"language" : util.code_to_language(lang),
|
||||
"source" : chapter["srcTitle"],
|
||||
"source_id" : chapter["sourceId"],
|
||||
"date" : text.parse_timestamp(
|
||||
"date" : self.parse_timestamp(
|
||||
chapter["dateCreate"] // 1000),
|
||||
"_extractor": MangaparkChapterExtractor,
|
||||
}
|
||||
|
||||
@@ -40,10 +40,8 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
|
||||
"chapter_minor": str(round(minor, 5))[1:] if minor else "",
|
||||
"chapter_id" : text.parse_int(chapter_id),
|
||||
"chapter_url" : comic["url"],
|
||||
"date" : text.parse_datetime(
|
||||
comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date_updated" : text.parse_datetime(
|
||||
comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date" : self.parse_datetime_iso(comic["datePublished"]),
|
||||
"date_updated" : self.parse_datetime_iso(comic["dateModified"]),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
|
||||
@@ -119,7 +119,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
|
||||
"album": {
|
||||
"id": self.album_id,
|
||||
"name": text.unescape(title),
|
||||
"date": text.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
|
||||
"date": self.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
|
||||
"description": text.unescape(descr),
|
||||
},
|
||||
"count": text.parse_int(count),
|
||||
|
||||
@@ -64,8 +64,7 @@ class MastodonExtractor(BaseExtractor):
|
||||
|
||||
status["count"] = len(attachments)
|
||||
status["tags"] = [tag["name"] for tag in status["tags"]]
|
||||
status["date"] = text.parse_datetime(
|
||||
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
status["date"] = self.parse_datetime_iso(status["created_at"][:19])
|
||||
|
||||
yield Message.Directory, status
|
||||
for status["num"], media in enumerate(attachments, 1):
|
||||
@@ -319,10 +318,8 @@ class MastodonAPI():
|
||||
if code == 404:
|
||||
raise exception.NotFoundError()
|
||||
if code == 429:
|
||||
self.extractor.wait(until=text.parse_datetime(
|
||||
response.headers["x-ratelimit-reset"],
|
||||
"%Y-%m-%dT%H:%M:%S.%fZ",
|
||||
))
|
||||
self.extractor.wait(until=self.parse_datetime_iso(
|
||||
response.headers["x-ratelimit-reset"]))
|
||||
continue
|
||||
raise exception.AbortExtraction(response.json().get("error"))
|
||||
|
||||
|
||||
@@ -48,13 +48,11 @@ class MisskeyExtractor(BaseExtractor):
|
||||
note["instance"] = self.instance
|
||||
note["instance_remote"] = note["user"]["host"]
|
||||
note["count"] = len(files)
|
||||
note["date"] = text.parse_datetime(
|
||||
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
note["date"] = self.parse_datetime_iso(note["createdAt"])
|
||||
|
||||
yield Message.Directory, note
|
||||
for note["num"], file in enumerate(files, 1):
|
||||
file["date"] = text.parse_datetime(
|
||||
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
file["date"] = self.parse_datetime_iso(file["createdAt"])
|
||||
note["file"] = file
|
||||
url = file["url"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, note)
|
||||
|
||||
@@ -9,9 +9,8 @@
|
||||
"""Extractors for Moebooru based sites"""
|
||||
|
||||
from .booru import BooruExtractor
|
||||
from .. import text, util
|
||||
from .. import text, util, dt
|
||||
import collections
|
||||
import datetime
|
||||
|
||||
|
||||
class MoebooruExtractor(BooruExtractor):
|
||||
@@ -21,7 +20,7 @@ class MoebooruExtractor(BooruExtractor):
|
||||
page_start = 1
|
||||
|
||||
def _prepare(self, post):
|
||||
post["date"] = text.parse_timestamp(post["created_at"])
|
||||
post["date"] = dt.parse_ts(post["created_at"])
|
||||
|
||||
def _html(self, post):
|
||||
url = f"{self.root}/post/show/{post['id']}"
|
||||
@@ -164,14 +163,14 @@ class MoebooruPopularExtractor(MoebooruExtractor):
|
||||
date = (f"{params['year']:>04}-{params.get('month', '01'):>02}-"
|
||||
f"{params.get('day', '01'):>02}")
|
||||
else:
|
||||
date = datetime.date.today().isoformat()
|
||||
date = dt.date.today().isoformat()
|
||||
|
||||
scale = self.scale
|
||||
if scale.startswith("by_"):
|
||||
scale = scale[3:]
|
||||
if scale == "week":
|
||||
date = datetime.date.fromisoformat(date)
|
||||
date = (date - datetime.timedelta(days=date.weekday())).isoformat()
|
||||
date = dt.date.fromisoformat(date)
|
||||
date = (date - dt.timedelta(days=date.weekday())).isoformat()
|
||||
elif scale == "month":
|
||||
date = date[:-3]
|
||||
|
||||
|
||||
@@ -9,9 +9,8 @@
|
||||
"""Extractors for https://motherless.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from .. import text, dt, exception
|
||||
from ..cache import memcache
|
||||
from datetime import timedelta
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?motherless\.com"
|
||||
|
||||
@@ -115,14 +114,14 @@ class MotherlessExtractor(Extractor):
|
||||
|
||||
return data
|
||||
|
||||
def _parse_datetime(self, dt):
|
||||
if " ago" not in dt:
|
||||
return text.parse_datetime(dt, "%d %b %Y")
|
||||
def _parse_datetime(self, dt_string):
|
||||
if " ago" not in dt_string:
|
||||
return dt.parse(dt_string, "%d %b %Y")
|
||||
|
||||
value = text.parse_int(dt[:-5])
|
||||
delta = timedelta(0, value*3600) if dt[-5] == "h" else timedelta(value)
|
||||
return (util.datetime_utcnow() - delta).replace(
|
||||
hour=0, minute=0, second=0)
|
||||
value = text.parse_int(dt_string[:-5])
|
||||
delta = (dt.timedelta(0, value*3600) if dt_string[-5] == "h" else
|
||||
dt.timedelta(value))
|
||||
return (dt.now() - delta).replace(hour=0, minute=0, second=0)
|
||||
|
||||
@memcache(keyarg=2)
|
||||
def _extract_gallery_title(self, page, gallery_id):
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
"""Extractors for https://blog.naver.com/"""
|
||||
|
||||
from .common import GalleryExtractor, Extractor, Message
|
||||
from .. import text, util
|
||||
import datetime
|
||||
from .. import text, util, dt
|
||||
import time
|
||||
|
||||
|
||||
@@ -67,11 +66,11 @@ class NaverBlogPostExtractor(NaverBlogBase, GalleryExtractor):
|
||||
|
||||
return data
|
||||
|
||||
def _parse_datetime(self, date_string):
|
||||
if "전" in date_string:
|
||||
def _parse_datetime(self, dt_string):
|
||||
if "전" in dt_string:
|
||||
ts = time.gmtime()
|
||||
return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
|
||||
return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M")
|
||||
return dt.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
|
||||
return dt.parse(dt_string, "%Y. %m. %d. %H:%M")
|
||||
|
||||
def images(self, page):
|
||||
files = []
|
||||
|
||||
@@ -31,17 +31,17 @@ class NaverChzzkExtractor(Extractor):
|
||||
data["uid"] = data["objectId"]
|
||||
data["user"] = comment["user"]
|
||||
data["count"] = len(files)
|
||||
data["date"] = text.parse_datetime(
|
||||
data["date"] = self.parse_datetime(
|
||||
data["createdDate"], "%Y%m%d%H%M%S")
|
||||
|
||||
yield Message.Directory, data
|
||||
for data["num"], file in enumerate(files, 1):
|
||||
if extra := file.get("extraJson"):
|
||||
file.update(util.json_loads(extra))
|
||||
file["date"] = text.parse_datetime(
|
||||
file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
file["date_updated"] = text.parse_datetime(
|
||||
file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
file["date"] = self.parse_datetime_iso(
|
||||
file["createdDate"])
|
||||
file["date_updated"] = self.parse_datetime_iso(
|
||||
file["updatedDate"])
|
||||
data["file"] = file
|
||||
url = file["attachValue"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
@@ -59,8 +59,8 @@ class NekohousePostExtractor(NekohouseExtractor):
|
||||
'class="scrape__user-name', '</').rpartition(">")[2].strip()),
|
||||
"title" : text.unescape(extr(
|
||||
'class="scrape__title', '</').rpartition(">")[2]),
|
||||
"date" : text.parse_datetime(extr(
|
||||
'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"),
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'datetime="', '"')[:19]),
|
||||
"content": text.unescape(extr(
|
||||
'class="scrape__content">', "</div>").strip()),
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://www.newgrounds.com/"""
|
||||
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from .. import text, util, dt, exception
|
||||
from ..cache import cache
|
||||
import itertools
|
||||
|
||||
@@ -218,7 +218,7 @@ class NewgroundsExtractor(Extractor):
|
||||
"description": text.unescape(extr(':description" content="', '"')),
|
||||
"type" : "art",
|
||||
"_type" : "i",
|
||||
"date" : text.parse_datetime(extr(
|
||||
"date" : dt.parse_iso(extr(
|
||||
'itemprop="datePublished" content="', '"')),
|
||||
"rating" : extr('class="rated-', '"'),
|
||||
"url" : full('src="', '"'),
|
||||
@@ -268,7 +268,7 @@ class NewgroundsExtractor(Extractor):
|
||||
"description": text.unescape(extr(':description" content="', '"')),
|
||||
"type" : "audio",
|
||||
"_type" : "a",
|
||||
"date" : text.parse_datetime(extr(
|
||||
"date" : dt.parse_iso(extr(
|
||||
'itemprop="datePublished" content="', '"')),
|
||||
"url" : extr('{"url":"', '"').replace("\\/", "/"),
|
||||
"index" : text.parse_int(index),
|
||||
@@ -287,7 +287,7 @@ class NewgroundsExtractor(Extractor):
|
||||
src = src.replace("\\/", "/")
|
||||
formats = ()
|
||||
type = extr(',"description":"', '"')
|
||||
date = text.parse_datetime(extr(
|
||||
date = dt.parse_iso(extr(
|
||||
'itemprop="datePublished" content="', '"'))
|
||||
if type:
|
||||
type = type.rpartition(" ")[2].lower()
|
||||
@@ -302,7 +302,7 @@ class NewgroundsExtractor(Extractor):
|
||||
sources = self.request_json(url, headers=headers)["sources"]
|
||||
formats = self._video_formats(sources)
|
||||
src = next(formats, "")
|
||||
date = text.parse_timestamp(src.rpartition("?")[2])
|
||||
date = self.parse_timestamp(src.rpartition("?")[2])
|
||||
type = "movie"
|
||||
|
||||
return {
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for nijie instances"""
|
||||
|
||||
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
|
||||
from .. import text, exception
|
||||
from .. import text, dt, exception
|
||||
from ..cache import cache
|
||||
|
||||
|
||||
@@ -82,8 +82,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
||||
"title" : keywords[0].strip(),
|
||||
"description": text.unescape(extr(
|
||||
'"description": "', '"').replace("&", "&")),
|
||||
"date" : text.parse_datetime(extr(
|
||||
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
|
||||
"date" : dt.parse(extr(
|
||||
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"
|
||||
) - dt.timedelta(hours=9),
|
||||
"artist_id" : text.parse_int(extr('/members.php?id=', '"')),
|
||||
"artist_name": keywords[1],
|
||||
"tags" : keywords[2:-1],
|
||||
@@ -101,9 +102,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
||||
"artist_id" : text.parse_int(extr('members.php?id=', '"')),
|
||||
"artist_name": keywords[1],
|
||||
"tags" : keywords[2:-1],
|
||||
"date" : text.parse_datetime(extr(
|
||||
"itemprop='datePublished' content=", "<").rpartition(">")[2],
|
||||
"%Y-%m-%d %H:%M:%S", 9),
|
||||
"date" : dt.parse_iso(extr(
|
||||
"itemprop='datePublished' content=", "<").rpartition(">")[2]
|
||||
) - dt.timedelta(hours=9),
|
||||
}
|
||||
|
||||
def _extract_images(self, image_id, page):
|
||||
|
||||
@@ -114,7 +114,7 @@ class NitterExtractor(BaseExtractor):
|
||||
return {
|
||||
"author" : author,
|
||||
"user" : self.user_obj or author,
|
||||
"date" : text.parse_datetime(
|
||||
"date" : self.parse_datetime(
|
||||
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
|
||||
"tweet_id": link.rpartition("/")[2].partition("#")[0],
|
||||
"content": extr('class="tweet-content', "</div").partition(">")[2],
|
||||
@@ -142,7 +142,7 @@ class NitterExtractor(BaseExtractor):
|
||||
return {
|
||||
"author" : author,
|
||||
"user" : self.user_obj or author,
|
||||
"date" : text.parse_datetime(
|
||||
"date" : self.parse_datetime(
|
||||
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
|
||||
"tweet_id": link.rpartition("/")[2].partition("#")[0],
|
||||
"content" : extr('class="quote-text', "</div").partition(">")[2],
|
||||
@@ -173,7 +173,7 @@ class NitterExtractor(BaseExtractor):
|
||||
"nick" : extr('title="', '"'),
|
||||
"name" : extr('title="@', '"'),
|
||||
"description" : extr('<p dir="auto">', '<'),
|
||||
"date" : text.parse_datetime(
|
||||
"date" : self.parse_datetime(
|
||||
extr('class="profile-joindate"><span title="', '"'),
|
||||
"%I:%M %p - %d %b %Y"),
|
||||
"statuses_count" : text.parse_int(extr(
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://nozomi.la/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
from .. import text, dt
|
||||
|
||||
|
||||
def decode_nozomi(n):
|
||||
@@ -49,10 +49,9 @@ class NozomiExtractor(Extractor):
|
||||
post["character"] = self._list(post.get("character"))
|
||||
|
||||
try:
|
||||
post["date"] = text.parse_datetime(
|
||||
post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
|
||||
post["date"] = dt.parse_iso(post["date"] + ":00")
|
||||
except Exception:
|
||||
post["date"] = None
|
||||
post["date"] = dt.NONE
|
||||
|
||||
post.update(data)
|
||||
|
||||
|
||||
@@ -53,8 +53,7 @@ class PahealExtractor(Extractor):
|
||||
extr("<source src='", "'")),
|
||||
"uploader": text.unquote(extr(
|
||||
"class='username' href='/user/", "'")),
|
||||
"date" : text.parse_datetime(
|
||||
extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
|
||||
"source" : text.unescape(text.extr(
|
||||
extr(">Source Link<", "</td>"), "href='", "'")),
|
||||
}
|
||||
@@ -133,7 +132,7 @@ class PahealTagExtractor(PahealExtractor):
|
||||
"duration" : text.parse_float(duration[:-1]),
|
||||
"tags" : text.unescape(tags),
|
||||
"size" : text.parse_bytes(size[:-1]),
|
||||
"date" : text.parse_datetime(date, "%B %d, %Y; %H:%M"),
|
||||
"date" : self.parse_datetime(date, "%B %d, %Y; %H:%M"),
|
||||
"filename" : f"{pid} - {tags}",
|
||||
"extension": ext,
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://www.patreon.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from .. import text, util, dt, exception
|
||||
from ..cache import memcache
|
||||
import collections
|
||||
import itertools
|
||||
@@ -177,8 +177,7 @@ class PatreonExtractor(Extractor):
|
||||
post, included, "attachments")
|
||||
attr["attachments_media"] = self._files(
|
||||
post, included, "attachments_media")
|
||||
attr["date"] = text.parse_datetime(
|
||||
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
attr["date"] = self.parse_datetime_iso(attr["published_at"])
|
||||
|
||||
try:
|
||||
attr["campaign"] = (included["campaign"][
|
||||
@@ -226,8 +225,7 @@ class PatreonExtractor(Extractor):
|
||||
user = response.json()["data"]
|
||||
attr = user["attributes"]
|
||||
attr["id"] = user["id"]
|
||||
attr["date"] = text.parse_datetime(
|
||||
attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
attr["date"] = self.parse_datetime_iso(attr["created"])
|
||||
return attr
|
||||
|
||||
def _collection(self, collection_id):
|
||||
@@ -236,8 +234,7 @@ class PatreonExtractor(Extractor):
|
||||
coll = data["data"]
|
||||
attr = coll["attributes"]
|
||||
attr["id"] = coll["id"]
|
||||
attr["date"] = text.parse_datetime(
|
||||
attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
attr["date"] = self.parse_datetime_iso(attr["created_at"])
|
||||
return attr
|
||||
|
||||
def _filename(self, url):
|
||||
@@ -445,8 +442,7 @@ class PatreonUserExtractor(PatreonExtractor):
|
||||
|
||||
def posts(self):
|
||||
if date_max := self._get_date_min_max(None, None)[1]:
|
||||
self._cursor = cursor = \
|
||||
util.datetime_from_timestamp(date_max).isoformat()
|
||||
self._cursor = cursor = dt.from_ts(date_max).isoformat()
|
||||
self._init_cursor = lambda: cursor
|
||||
|
||||
url = self._build_url("stream", (
|
||||
|
||||
@@ -35,8 +35,7 @@ class PexelsExtractor(Extractor):
|
||||
post["type"] = attr["type"]
|
||||
|
||||
post.update(metadata)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"][:-5])
|
||||
|
||||
if "image" in post:
|
||||
url, _, query = post["image"]["download_link"].partition("?")
|
||||
|
||||
@@ -36,8 +36,7 @@ class PhilomenaExtractor(BooruExtractor):
|
||||
return url
|
||||
|
||||
def _prepare(self, post):
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"][:19])
|
||||
|
||||
|
||||
BASE_PATTERN = PhilomenaExtractor.update({
|
||||
|
||||
@@ -29,8 +29,7 @@ class PhotovogueUserExtractor(Extractor):
|
||||
for photo in self.photos():
|
||||
url = photo["gallery_image"]
|
||||
photo["title"] = photo["title"].strip()
|
||||
photo["date"] = text.parse_datetime(
|
||||
photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
photo["date"] = self.parse_datetime_iso(photo["date"])
|
||||
|
||||
yield Message.Directory, photo
|
||||
yield Message.Url, url, text.nameext_from_url(url, photo)
|
||||
|
||||
@@ -29,8 +29,7 @@ class PicartoGalleryExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%d %H:%M:%S")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
variations = post.pop("variations", ())
|
||||
yield Message.Directory, post
|
||||
|
||||
|
||||
@@ -26,8 +26,7 @@ class PiczelExtractor(Extractor):
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
|
||||
if post["multi"]:
|
||||
images = post["images"]
|
||||
|
||||
@@ -48,8 +48,7 @@ class PillowfortExtractor(Extractor):
|
||||
for url in inline(post["content"]):
|
||||
files.append({"url": url})
|
||||
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
post["post_id"] = post.pop("id")
|
||||
post["count"] = len(files)
|
||||
yield Message.Directory, post
|
||||
@@ -76,8 +75,7 @@ class PillowfortExtractor(Extractor):
|
||||
if "id" not in file:
|
||||
post["id"] = post["hash"]
|
||||
if "created_at" in file:
|
||||
post["date"] = text.parse_datetime(
|
||||
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = self.parse_datetime_iso(file["created_at"])
|
||||
|
||||
yield msgtype, url, post
|
||||
|
||||
|
||||
@@ -24,10 +24,6 @@ class PixeldrainExtractor(Extractor):
|
||||
if api_key := self.config("api-key"):
|
||||
self.session.auth = util.HTTPBasicAuth("", api_key)
|
||||
|
||||
def parse_datetime(self, date_string):
|
||||
return text.parse_datetime(
|
||||
date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
|
||||
|
||||
class PixeldrainFileExtractor(PixeldrainExtractor):
|
||||
"""Extractor for pixeldrain files"""
|
||||
@@ -45,7 +41,7 @@ class PixeldrainFileExtractor(PixeldrainExtractor):
|
||||
file = self.request_json(url + "/info")
|
||||
|
||||
file["url"] = url + "?download"
|
||||
file["date"] = self.parse_datetime(file["date_upload"])
|
||||
file["date"] = self.parse_datetime_iso(file["date_upload"])
|
||||
|
||||
text.nameext_from_url(file["name"], file)
|
||||
yield Message.Directory, file
|
||||
@@ -72,7 +68,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
|
||||
|
||||
files = album["files"]
|
||||
album["count"] = album["file_count"]
|
||||
album["date"] = self.parse_datetime(album["date_created"])
|
||||
album["date"] = self.parse_datetime_iso(album["date_created"])
|
||||
|
||||
if self.file_index:
|
||||
idx = text.parse_int(self.file_index)
|
||||
@@ -91,7 +87,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
|
||||
file["album"] = album
|
||||
file["num"] = num
|
||||
file["url"] = url = f"{self.root}/api/file/{file['id']}?download"
|
||||
file["date"] = self.parse_datetime(file["date_upload"])
|
||||
file["date"] = self.parse_datetime_iso(file["date_upload"])
|
||||
text.nameext_from_url(file["name"], file)
|
||||
yield Message.Url, url, file
|
||||
|
||||
@@ -112,7 +108,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor):
|
||||
"mime_type" : data["file_type"],
|
||||
"size" : data["file_size"],
|
||||
"hash_sha256": data["sha256_sum"],
|
||||
"date" : self.parse_datetime(data["created"]),
|
||||
"date" : self.parse_datetime_iso(data["created"]),
|
||||
}
|
||||
|
||||
def items(self):
|
||||
|
||||
@@ -9,9 +9,8 @@
|
||||
"""Extractors for https://www.pixiv.net/"""
|
||||
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from .. import text, util, dt, exception
|
||||
from ..cache import cache, memcache
|
||||
from datetime import datetime, timedelta
|
||||
import itertools
|
||||
import hashlib
|
||||
|
||||
@@ -96,7 +95,7 @@ class PixivExtractor(Extractor):
|
||||
if transform_tags:
|
||||
transform_tags(work)
|
||||
work["num"] = 0
|
||||
work["date"] = text.parse_datetime(work["create_date"])
|
||||
work["date"] = dt.parse_iso(work["create_date"])
|
||||
work["rating"] = ratings.get(work["x_restrict"])
|
||||
work["suffix"] = ""
|
||||
work.update(metadata)
|
||||
@@ -353,10 +352,10 @@ class PixivExtractor(Extractor):
|
||||
if fmt in urls:
|
||||
yield urls[fmt]
|
||||
|
||||
def _date_from_url(self, url, offset=timedelta(hours=9)):
|
||||
def _date_from_url(self, url, offset=dt.timedelta(hours=9)):
|
||||
try:
|
||||
_, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
|
||||
return datetime(
|
||||
return dt.datetime(
|
||||
int(y), int(m), int(d), int(H), int(M), int(S)) - offset
|
||||
except Exception:
|
||||
return None
|
||||
@@ -715,8 +714,7 @@ class PixivRankingExtractor(PixivExtractor):
|
||||
self.log.warning("invalid date '%s'", date)
|
||||
date = None
|
||||
if not date:
|
||||
now = util.datetime_utcnow()
|
||||
date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
date = (dt.now() - dt.timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
self.date = date
|
||||
|
||||
self.type = type = query.get("content")
|
||||
@@ -891,8 +889,7 @@ class PixivSketchExtractor(Extractor):
|
||||
for post in self.posts():
|
||||
media = post["media"]
|
||||
post["post_id"] = post["id"]
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
post["date"] = dt.parse_iso(post["created_at"])
|
||||
util.delete_items(post, ("id", "media", "_links"))
|
||||
|
||||
yield Message.Directory, post
|
||||
@@ -972,7 +969,7 @@ class PixivNovelExtractor(PixivExtractor):
|
||||
if transform_tags:
|
||||
transform_tags(novel)
|
||||
novel["num"] = 0
|
||||
novel["date"] = text.parse_datetime(novel["create_date"])
|
||||
novel["date"] = dt.parse_iso(novel["create_date"])
|
||||
novel["rating"] = ratings.get(novel["x_restrict"])
|
||||
novel["suffix"] = ""
|
||||
|
||||
@@ -1154,7 +1151,7 @@ class PixivAppAPI():
|
||||
"get_secure_url": "1",
|
||||
}
|
||||
|
||||
time = util.datetime_utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
||||
time = dt.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
||||
headers = {
|
||||
"X-Client-Time": time,
|
||||
"X-Client-Hash": hashlib.md5(
|
||||
@@ -1329,11 +1326,11 @@ class PixivAppAPI():
|
||||
sort = params["sort"]
|
||||
if sort == "date_desc":
|
||||
date_key = "end_date"
|
||||
date_off = timedelta(days=1)
|
||||
date_off = dt.timedelta(days=1)
|
||||
date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731
|
||||
elif sort == "date_asc":
|
||||
date_key = "start_date"
|
||||
date_off = timedelta(days=-1)
|
||||
date_off = dt.timedelta(days=-1)
|
||||
date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731
|
||||
else:
|
||||
date_key = None
|
||||
@@ -1360,8 +1357,8 @@ class PixivAppAPI():
|
||||
|
||||
if date_key and text.parse_int(params.get("offset")) >= 5000:
|
||||
date_last = data["illusts"][-1]["create_date"]
|
||||
date_val = (text.parse_datetime(
|
||||
date_last) + date_off).strftime("%Y-%m-%d")
|
||||
date_val = (dt.parse_iso(date_last) + date_off).strftime(
|
||||
"%Y-%m-%d")
|
||||
self.log.info("Reached 'offset' >= 5000; "
|
||||
"Updating '%s' to '%s'", date_key, date_val)
|
||||
params[date_key] = date_val
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
"""Extractors for https://www.plurk.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
import datetime
|
||||
from .. import text, util, dt, exception
|
||||
|
||||
|
||||
class PlurkExtractor(Extractor):
|
||||
@@ -88,12 +87,10 @@ class PlurkTimelineExtractor(PlurkExtractor):
|
||||
while plurks:
|
||||
yield from plurks
|
||||
|
||||
offset = datetime.datetime.strptime(
|
||||
plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
|
||||
offset = dt.parse(plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
|
||||
data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
response = self.request(
|
||||
url, method="POST", headers=headers, data=data)
|
||||
plurks = response.json()["plurks"]
|
||||
plurks = self.request_json(
|
||||
url, method="POST", headers=headers, data=data)["plurks"]
|
||||
|
||||
|
||||
class PlurkPostExtractor(PlurkExtractor):
|
||||
|
||||
@@ -150,8 +150,7 @@ class PornhubGifExtractor(PornhubExtractor):
|
||||
"tags" : extr("data-context-tag='", "'").split(","),
|
||||
"title": extr('"name": "', '"'),
|
||||
"url" : extr('"contentUrl": "', '"'),
|
||||
"date" : text.parse_datetime(
|
||||
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
|
||||
"date" : self.parse_datetime_iso(extr('"uploadDate": "', '"')),
|
||||
"viewkey" : extr('From this video: '
|
||||
'<a href="/view_video.php?viewkey=', '"'),
|
||||
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),
|
||||
|
||||
@@ -31,7 +31,7 @@ class PostmillExtractor(BaseExtractor):
|
||||
|
||||
title = text.unescape(extr(
|
||||
'<meta property="og:title" content="', '">'))
|
||||
date = text.parse_datetime(extr(
|
||||
date = self.parse_datetime_iso(extr(
|
||||
'<meta property="og:article:published_time" content="', '">'))
|
||||
username = extr(
|
||||
'<meta property="og:article:author" content="', '">')
|
||||
|
||||
@@ -42,7 +42,7 @@ class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor):
|
||||
"chapter_minor": sep + minor,
|
||||
"chapter_id" : text.parse_int(item["cid"]),
|
||||
"title" : text.unescape(title),
|
||||
"date" : text.parse_datetime(
|
||||
"date" : self.parse_datetime(
|
||||
date, "%Y-%m-%dWIB%H:%M:%S%z"),
|
||||
"thumbnail" : item.get("t"),
|
||||
"lang" : "ja",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user