[dt] update 'parse_datetime' calls with one argument
This commit is contained in:
@@ -74,7 +74,7 @@ if sys.hexversion < 0x30c0000:
|
||||
except Exception:
|
||||
return NONE
|
||||
|
||||
def parse_compat(dt_string, format):
|
||||
def parse_compat(dt_string, format="%Y-%m-%dT%H:%M:%S%z"):
|
||||
"""Parse 'dt_string' as ISO 8601 value using 'format'"""
|
||||
return parse(dt_string, format)
|
||||
|
||||
@@ -90,7 +90,7 @@ else:
|
||||
except Exception:
|
||||
return NONE
|
||||
|
||||
def parse_compat(dt_string, format):
|
||||
def parse_compat(dt_string, format=None):
|
||||
"""Parse 'dt_string' as ISO 8601 value"""
|
||||
return parse_iso(dt_string)
|
||||
|
||||
|
||||
@@ -61,10 +61,9 @@ class _4archiveThreadExtractor(Extractor):
|
||||
extr = text.extract_from(post)
|
||||
data = {
|
||||
"name": extr('class="name">', "</span>"),
|
||||
"date": self.parse_datetime(
|
||||
"date": self.parse_datetime_iso(
|
||||
(extr('class="dateTime">', "<") or
|
||||
extr('class="dateTime postNum" >', "<")).strip(),
|
||||
"%Y-%m-%d %H:%M:%S"),
|
||||
extr('class="dateTime postNum" >', "<")).strip()),
|
||||
"no" : text.parse_int(extr(">Post No.", "<")),
|
||||
}
|
||||
if 'class="file"' in post:
|
||||
|
||||
@@ -141,8 +141,8 @@ class BellazonExtractor(Extractor):
|
||||
"title": schema["headline"],
|
||||
"views": stats[0]["userInteractionCount"],
|
||||
"posts": stats[1]["userInteractionCount"],
|
||||
"date" : self.parse_datetime(schema["datePublished"]),
|
||||
"date_updated": self.parse_datetime(schema["dateModified"]),
|
||||
"date" : self.parse_datetime_iso(schema["datePublished"]),
|
||||
"date_updated": self.parse_datetime_iso(schema["dateModified"]),
|
||||
"description" : text.unescape(schema["text"]).strip(),
|
||||
"section" : path[-2],
|
||||
"author" : author["name"],
|
||||
@@ -162,7 +162,7 @@ class BellazonExtractor(Extractor):
|
||||
post = {
|
||||
"id": extr('id="elComment_', '"'),
|
||||
"author_url": extr(" href='", "'"),
|
||||
"date": self.parse_datetime(extr("datetime='", "'")),
|
||||
"date": self.parse_datetime_iso(extr("datetime='", "'")),
|
||||
"content": extr("<!-- Post content -->", "\n\t\t</div>"),
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ class BloggerExtractor(BaseExtractor):
|
||||
blog = self.api.blog_by_url("http://" + self.blog)
|
||||
blog["pages"] = blog["pages"]["totalItems"]
|
||||
blog["posts"] = blog["posts"]["totalItems"]
|
||||
blog["date"] = self.parse_datetime(blog["published"])
|
||||
blog["date"] = self.parse_datetime_iso(blog["published"])
|
||||
del blog["selfLink"]
|
||||
|
||||
findall_image = util.re(
|
||||
@@ -65,7 +65,7 @@ class BloggerExtractor(BaseExtractor):
|
||||
post["author"] = post["author"]["displayName"]
|
||||
post["replies"] = post["replies"]["totalItems"]
|
||||
post["content"] = text.remove_html(content)
|
||||
post["date"] = self.parse_datetime(post["published"])
|
||||
post["date"] = self.parse_datetime_iso(post["published"])
|
||||
del post["selfLink"]
|
||||
del post["blog"]
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class CienArticleExtractor(CienExtractor):
|
||||
post["post_url"] = url
|
||||
post["post_id"] = text.parse_int(post_id)
|
||||
post["count"] = len(files)
|
||||
post["date"] = self.parse_datetime(post["datePublished"])
|
||||
post["date"] = self.parse_datetime_iso(post["datePublished"])
|
||||
|
||||
try:
|
||||
post["author"]["id"] = text.parse_int(author_id)
|
||||
|
||||
@@ -1187,7 +1187,7 @@ class DeviantartStatusExtractor(DeviantartExtractor):
|
||||
deviation["username"] = deviation["author"]["username"]
|
||||
deviation["_username"] = deviation["username"].lower()
|
||||
|
||||
deviation["date"] = d = self.parse_datetime(deviation["ts"])
|
||||
deviation["date"] = d = self.parse_datetime_iso(deviation["ts"])
|
||||
deviation["published_time"] = int(dt.to_ts(d))
|
||||
|
||||
deviation["da_category"] = "Status"
|
||||
|
||||
@@ -72,9 +72,7 @@ class DiscordExtractor(Extractor):
|
||||
"author_files": [],
|
||||
"message": self.extract_message_text(message),
|
||||
"message_id": message["id"],
|
||||
"date": self.parse_datetime(
|
||||
message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z"
|
||||
),
|
||||
"date": self.parse_datetime_iso(message["timestamp"]),
|
||||
"files": []
|
||||
})
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ class FanboxExtractor(Extractor):
|
||||
if file.get("extension", "").lower() in exts
|
||||
]
|
||||
|
||||
post["date"] = self.parse_datetime(post["publishedDatetime"])
|
||||
post["date"] = self.parse_datetime_iso(post["publishedDatetime"])
|
||||
post["text"] = content_body.get("text") if content_body else None
|
||||
post["isCoverImage"] = False
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class HatenablogExtractor(Extractor):
|
||||
|
||||
def _handle_article(self, article: str):
|
||||
extr = text.extract_from(article)
|
||||
date = self.parse_datetime(extr('<time datetime="', '"'))
|
||||
date = self.parse_datetime_iso(extr('<time datetime="', '"'))
|
||||
entry_link = text.unescape(extr('<a href="', '"'))
|
||||
entry = entry_link.partition("/entry/")[2]
|
||||
title = text.unescape(extr('>', '<'))
|
||||
|
||||
@@ -86,7 +86,7 @@ class HentaifoundryExtractor(Extractor):
|
||||
.replace("\r\n", "\n")),
|
||||
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
|
||||
"class='ratings_box'", "</div>"), "title='", "'")],
|
||||
"date" : self.parse_datetime(extr("datetime='", "'")),
|
||||
"date" : self.parse_datetime_iso(extr("datetime='", "'")),
|
||||
"views" : text.parse_int(extr(">Views</span>", "<")),
|
||||
"score" : text.parse_int(extr(">Vote Score</span>", "<")),
|
||||
"media" : text.unescape(extr(">Media</span>", "<").strip()),
|
||||
|
||||
@@ -38,7 +38,7 @@ class ImgurExtractor(Extractor):
|
||||
|
||||
image["url"] = url = \
|
||||
f"https://i.imgur.com/{image['id']}.{image['ext']}"
|
||||
image["date"] = self.parse_datetime(image["created_at"])
|
||||
image["date"] = self.parse_datetime_iso(image["created_at"])
|
||||
image["_http_validate"] = self._validate
|
||||
text.nameext_from_url(url, image)
|
||||
|
||||
@@ -106,7 +106,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
|
||||
|
||||
del album["media"]
|
||||
count = len(images)
|
||||
album["date"] = self.parse_datetime(album["created_at"])
|
||||
album["date"] = self.parse_datetime_iso(album["created_at"])
|
||||
|
||||
try:
|
||||
del album["ad_url"]
|
||||
|
||||
@@ -45,7 +45,7 @@ class LivedoorExtractor(Extractor):
|
||||
"title" : text.unescape(extr('dc:title="', '"')),
|
||||
"categories" : extr('dc:subject="', '"').partition(",")[::2],
|
||||
"description": extr('dc:description="', '"'),
|
||||
"date" : self.parse_datetime(extr('dc:date="', '"')),
|
||||
"date" : self.parse_datetime_iso(extr('dc:date="', '"')),
|
||||
"tags" : text.split_html(tags)[1:] if tags else [],
|
||||
"user" : self.user,
|
||||
"body" : body,
|
||||
|
||||
@@ -68,7 +68,7 @@ class MangadexExtractor(Extractor):
|
||||
"chapter" : text.parse_int(chnum),
|
||||
"chapter_minor": f"{sep}{minor}",
|
||||
"chapter_id": chapter["id"],
|
||||
"date" : self.parse_datetime(cattributes["publishAt"]),
|
||||
"date" : self.parse_datetime_iso(cattributes["publishAt"]),
|
||||
"group" : [group["attributes"]["name"]
|
||||
for group in relationships["scanlation_group"]],
|
||||
"lang" : lang,
|
||||
@@ -109,8 +109,8 @@ class MangadexCoversExtractor(MangadexExtractor):
|
||||
"cover" : cattributes["fileName"],
|
||||
"lang" : cattributes.get("locale"),
|
||||
"volume" : text.parse_int(cattributes["volume"]),
|
||||
"date" : self.parse_datetime(cattributes["createdAt"]),
|
||||
"date_updated": self.parse_datetime(cattributes["updatedAt"]),
|
||||
"date" : self.parse_datetime_iso(cattributes["createdAt"]),
|
||||
"date_updated": self.parse_datetime_iso(cattributes["updatedAt"]),
|
||||
}
|
||||
|
||||
|
||||
@@ -454,7 +454,7 @@ def _manga_info(self, uuid):
|
||||
"manga_id": manga["id"],
|
||||
"manga_titles": [t.popitem()[1]
|
||||
for t in mattr.get("altTitles") or ()],
|
||||
"manga_date" : self.parse_datetime(mattr.get("createdAt")),
|
||||
"manga_date" : self.parse_datetime_iso(mattr.get("createdAt")),
|
||||
"description" : (mattr["description"].get("en") or
|
||||
next(iter(mattr["description"].values()), "")),
|
||||
"demographic": mattr.get("publicationDemographic"),
|
||||
|
||||
@@ -64,8 +64,7 @@ class MastodonExtractor(BaseExtractor):
|
||||
|
||||
status["count"] = len(attachments)
|
||||
status["tags"] = [tag["name"] for tag in status["tags"]]
|
||||
status["date"] = self.parse_datetime(
|
||||
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
status["date"] = self.parse_datetime_iso(status["created_at"][:19])
|
||||
|
||||
yield Message.Directory, status
|
||||
for status["num"], media in enumerate(attachments, 1):
|
||||
@@ -319,10 +318,8 @@ class MastodonAPI():
|
||||
if code == 404:
|
||||
raise exception.NotFoundError()
|
||||
if code == 429:
|
||||
self.extractor.wait(until=self.parse_datetime(
|
||||
response.headers["x-ratelimit-reset"],
|
||||
"%Y-%m-%dT%H:%M:%S.%fZ",
|
||||
))
|
||||
self.extractor.wait(until=self.parse_datetime_iso(
|
||||
response.headers["x-ratelimit-reset"]))
|
||||
continue
|
||||
raise exception.AbortExtraction(response.json().get("error"))
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://www.newgrounds.com/"""
|
||||
|
||||
from .common import Extractor, Message, Dispatch
|
||||
from .. import text, util, exception
|
||||
from .. import text, util, dt, exception
|
||||
from ..cache import cache
|
||||
import itertools
|
||||
|
||||
@@ -218,7 +218,7 @@ class NewgroundsExtractor(Extractor):
|
||||
"description": text.unescape(extr(':description" content="', '"')),
|
||||
"type" : "art",
|
||||
"_type" : "i",
|
||||
"date" : self.parse_datetime(extr(
|
||||
"date" : dt.parse_compat(extr(
|
||||
'itemprop="datePublished" content="', '"')),
|
||||
"rating" : extr('class="rated-', '"'),
|
||||
"url" : full('src="', '"'),
|
||||
@@ -268,7 +268,7 @@ class NewgroundsExtractor(Extractor):
|
||||
"description": text.unescape(extr(':description" content="', '"')),
|
||||
"type" : "audio",
|
||||
"_type" : "a",
|
||||
"date" : self.parse_datetime(extr(
|
||||
"date" : dt.parse_compat(extr(
|
||||
'itemprop="datePublished" content="', '"')),
|
||||
"url" : extr('{"url":"', '"').replace("\\/", "/"),
|
||||
"index" : text.parse_int(index),
|
||||
@@ -287,7 +287,7 @@ class NewgroundsExtractor(Extractor):
|
||||
src = src.replace("\\/", "/")
|
||||
formats = ()
|
||||
type = extr(',"description":"', '"')
|
||||
date = self.parse_datetime(extr(
|
||||
date = dt.parse_compat(extr(
|
||||
'itemprop="datePublished" content="', '"'))
|
||||
if type:
|
||||
type = type.rpartition(" ")[2].lower()
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extractors for nijie instances"""
|
||||
|
||||
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
|
||||
from .. import text, exception
|
||||
from .. import text, dt, exception
|
||||
from ..cache import cache
|
||||
|
||||
|
||||
@@ -82,8 +82,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
||||
"title" : keywords[0].strip(),
|
||||
"description": text.unescape(extr(
|
||||
'"description": "', '"').replace("&", "&")),
|
||||
"date" : self.parse_datetime(extr(
|
||||
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
|
||||
"date" : dt.parse(extr(
|
||||
'"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"
|
||||
) - dt.timedelta(hours=9),
|
||||
"artist_id" : text.parse_int(extr('/members.php?id=', '"')),
|
||||
"artist_name": keywords[1],
|
||||
"tags" : keywords[2:-1],
|
||||
@@ -101,9 +102,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
||||
"artist_id" : text.parse_int(extr('members.php?id=', '"')),
|
||||
"artist_name": keywords[1],
|
||||
"tags" : keywords[2:-1],
|
||||
"date" : self.parse_datetime(extr(
|
||||
"itemprop='datePublished' content=", "<").rpartition(">")[2],
|
||||
"%Y-%m-%d %H:%M:%S", 9),
|
||||
"date" : dt.parse_iso(extr(
|
||||
"itemprop='datePublished' content=", "<").rpartition(">")[2]
|
||||
) - dt.timedelta(hours=9),
|
||||
}
|
||||
|
||||
def _extract_images(self, image_id, page):
|
||||
|
||||
@@ -31,7 +31,7 @@ class PostmillExtractor(BaseExtractor):
|
||||
|
||||
title = text.unescape(extr(
|
||||
'<meta property="og:title" content="', '">'))
|
||||
date = self.parse_datetime(extr(
|
||||
date = self.parse_datetime_iso(extr(
|
||||
'<meta property="og:article:published_time" content="', '">'))
|
||||
username = extr(
|
||||
'<meta property="og:article:author" content="', '">')
|
||||
|
||||
@@ -97,7 +97,7 @@ class ReactorExtractor(BaseExtractor):
|
||||
return
|
||||
|
||||
num = 0
|
||||
date = self.parse_datetime(data["datePublished"])
|
||||
date = self.parse_datetime_iso(data["datePublished"])
|
||||
user = data["author"]["name"]
|
||||
description = text.unescape(data["description"])
|
||||
title, _, tags = text.unescape(data["headline"]).partition(" / ")
|
||||
|
||||
@@ -40,7 +40,7 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
|
||||
extr('property="og:title" content="', '"')),
|
||||
"description": text.unescape(
|
||||
extr('property="og:description" content="', '"')),
|
||||
"date" : self.parse_datetime(
|
||||
"date" : self.parse_datetime_iso(
|
||||
extr('property="article:published_time" content="', '"')),
|
||||
}
|
||||
content = extr('<div class="entry-content">', '</article>')
|
||||
|
||||
@@ -98,7 +98,7 @@ class SimpcityExtractor(Extractor):
|
||||
"id" : url_t[url_t.rfind(".")+1:-1],
|
||||
"url" : url_t,
|
||||
"title": schema["headline"],
|
||||
"date" : self.parse_datetime(schema["datePublished"]),
|
||||
"date" : self.parse_datetime_iso(schema["datePublished"]),
|
||||
"views": stats[0]["userInteractionCount"],
|
||||
"posts": stats[1]["userInteractionCount"],
|
||||
"tags" : (schema["keywords"].split(", ")
|
||||
@@ -119,7 +119,7 @@ class SimpcityExtractor(Extractor):
|
||||
"author": extr('data-author="', '"'),
|
||||
"id": extr('data-content="post-', '"'),
|
||||
"author_url": extr('itemprop="url" content="', '"'),
|
||||
"date": self.parse_datetime(extr('datetime="', '"')),
|
||||
"date": self.parse_datetime_iso(extr('datetime="', '"')),
|
||||
"content": extr('<div itemprop="text">',
|
||||
'<div class="js-selectToQuote').strip(),
|
||||
}
|
||||
|
||||
@@ -89,7 +89,7 @@ class TapasEpisodeExtractor(TapasExtractor):
|
||||
|
||||
html = data["html"]
|
||||
episode["series"] = self._extract_series(html)
|
||||
episode["date"] = self.parse_datetime(episode["publish_date"])
|
||||
episode["date"] = self.parse_datetime_iso(episode["publish_date"])
|
||||
yield Message.Directory, episode
|
||||
|
||||
if episode["book"]:
|
||||
|
||||
@@ -27,9 +27,8 @@ class TelegraphGalleryExtractor(GalleryExtractor):
|
||||
'property="og:title" content="', '"')),
|
||||
"description": text.unescape(extr(
|
||||
'property="og:description" content="', '"')),
|
||||
"date": self.parse_datetime(extr(
|
||||
'property="article:published_time" content="', '"'),
|
||||
"%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date": self.parse_datetime_iso(extr(
|
||||
'property="article:published_time" content="', '"')),
|
||||
"author": text.unescape(extr(
|
||||
'property="article:author" content="', '"')),
|
||||
"post_url": text.unescape(extr(
|
||||
|
||||
@@ -23,7 +23,7 @@ class TungstenExtractor(Extractor):
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
url = post["original_url"]
|
||||
post["date"] = self.parse_datetime(post["created_at"])
|
||||
post["date"] = self.parse_datetime_iso(post["created_at"])
|
||||
post["filename"] = url[url.rfind("/")+1:]
|
||||
post["extension"] = "webp"
|
||||
yield Message.Directory, post
|
||||
|
||||
@@ -41,7 +41,7 @@ class UnsplashExtractor(Extractor):
|
||||
if metadata:
|
||||
photo.update(metadata)
|
||||
photo["extension"] = "jpg"
|
||||
photo["date"] = self.parse_datetime(photo["created_at"])
|
||||
photo["date"] = self.parse_datetime_iso(photo["created_at"])
|
||||
if "tags" in photo:
|
||||
photo["tags"] = [t["title"] for t in photo["tags"]]
|
||||
|
||||
|
||||
@@ -24,8 +24,7 @@ class WeasylExtractor(Extractor):
|
||||
# Some submissions don't have content and can be skipped
|
||||
if "submission" in data["media"]:
|
||||
data["url"] = data["media"]["submission"][0]["url"]
|
||||
data["date"] = self.parse_datetime(
|
||||
data["posted_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
data["date"] = self.parse_datetime_iso(data["posted_at"][:19])
|
||||
text.nameext_from_url(data["url"], data)
|
||||
return True
|
||||
return False
|
||||
@@ -42,7 +41,7 @@ class WeasylExtractor(Extractor):
|
||||
f"{self.root}/api/journals/{journalid}/view")
|
||||
data["extension"] = "html"
|
||||
data["html"] = "text:" + data["content"]
|
||||
data["date"] = self.parse_datetime(data["posted_at"])
|
||||
data["date"] = self.parse_datetime_iso(data["posted_at"])
|
||||
return data
|
||||
|
||||
def submissions(self, owner_login, folderid=None):
|
||||
|
||||
@@ -76,7 +76,7 @@ class ZerochanExtractor(BooruExtractor):
|
||||
data = {
|
||||
"id" : text.parse_int(entry_id),
|
||||
"file_url": jsonld["contentUrl"],
|
||||
"date" : self.parse_datetime(jsonld["datePublished"]),
|
||||
"date" : self.parse_datetime_iso(jsonld["datePublished"]),
|
||||
"width" : text.parse_int(jsonld["width"][:-3]),
|
||||
"height" : text.parse_int(jsonld["height"][:-3]),
|
||||
"size" : text.parse_bytes(jsonld["contentSize"][:-1]),
|
||||
|
||||
Reference in New Issue
Block a user