[weasyl] update and simplify
- simplify 'pattern' regexps
- parse 'posted_at' as 'date'
- use unaltered 'title' ({title!l:R /_/} to lowercase and replace spaces)
This commit is contained in:
@@ -129,7 +129,7 @@ Twitter https://twitter.com/ |twitter-C|
|
|||||||
VSCO https://vsco.co/ Collections, individual Images, User Profiles
|
VSCO https://vsco.co/ Collections, individual Images, User Profiles
|
||||||
Wallhaven https://wallhaven.cc/ individual Images, Search Results Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__)
|
Wallhaven https://wallhaven.cc/ individual Images, Search Results Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__)
|
||||||
Warosu https://warosu.org/ Threads
|
Warosu https://warosu.org/ Threads
|
||||||
Weasyl https://www.weasyl.com/ |weasyl-C|
|
Weasyl https://www.weasyl.com/ Folders, Journals, Submissions
|
||||||
Webtoon https://www.webtoons.com/ Comics, Episodes
|
Webtoon https://www.webtoons.com/ Comics, Episodes
|
||||||
Weibo https://www.weibo.com/ Images from Statuses, User Profiles
|
Weibo https://www.weibo.com/ Images from Statuses, User Profiles
|
||||||
WikiArt.org https://www.wikiart.org/ Artists, Artist Listings, Artworks
|
WikiArt.org https://www.wikiart.org/ Artists, Artist Listings, Artworks
|
||||||
@@ -155,14 +155,13 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
|
|||||||
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles
|
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles
|
||||||
.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles
|
.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles
|
||||||
.. |furaffinity-C| replace:: Favorites, Galleries, Posts, Scraps, Search Results, User Profiles
|
.. |furaffinity-C| replace:: Favorites, Galleries, Posts, Scraps, Search Results, User Profiles
|
||||||
.. |hentaifoundry-C| replace:: Favorites, individual Images, Popular Images, Recent Images, Scraps, User Profiles
|
.. |hentaifoundry-C| replace:: Favorites, Galleries, individual Images, Popular Images, Recent Images, Scraps, User Profiles
|
||||||
.. |imgur-C| replace:: Albums, Favorites, Galleries, individual Images, Search Results, Subreddits, Tag Searches, User Profiles
|
.. |imgur-C| replace:: Albums, Favorites, Galleries, individual Images, Search Results, Subreddits, Tag Searches, User Profiles
|
||||||
.. |instagram-C| replace:: Channels, individual Images, Saved Posts, Stories, Tag Searches, User Profiles
|
.. |instagram-C| replace:: Channels, individual Images, Saved Posts, Stories, Tag Searches, User Profiles
|
||||||
.. |newgrounds-C| replace:: Art, Audio, Favorites, individual Images, Media Files, Movies, User Profiles
|
.. |newgrounds-C| replace:: Art, Audio, Favorites, individual Images, Media Files, Movies, User Profiles
|
||||||
.. |nijie-C| replace:: Doujin, Favorites, individual Images, User Profiles
|
.. |nijie-C| replace:: Doujin, Favorites, Illustrations, individual Images, User Profiles
|
||||||
.. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, individual Images
|
.. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, individual Images
|
||||||
.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles
|
.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles
|
||||||
.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders
|
.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders
|
||||||
.. |twitter-C| replace:: Bookmarks, Likes, Media Timelines, Search Results, Timelines, Tweets
|
.. |twitter-C| replace:: Bookmarks, Likes, Media Timelines, Search Results, Timelines, Tweets
|
||||||
.. |weasyl-C| replace:: Folders, Journals, Journals, Submissions, Submissions
|
|
||||||
.. |yuki-S| replace:: yuki.la 4chan archive
|
.. |yuki-S| replace:: yuki.la 4chan archive
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/"
|
BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/"
|
||||||
|
|
||||||
@@ -16,19 +15,18 @@ BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/"
|
|||||||
class WeasylExtractor(Extractor):
|
class WeasylExtractor(Extractor):
|
||||||
category = "weasyl"
|
category = "weasyl"
|
||||||
directory_fmt = ("{category}", "{owner_login}")
|
directory_fmt = ("{category}", "{owner_login}")
|
||||||
filename_fmt = "{submitid}_{title}.{extension}"
|
filename_fmt = "{submitid} {title}.{extension}"
|
||||||
archive_fmt = "{submitid}"
|
archive_fmt = "{submitid}"
|
||||||
root = "https://www.weasyl.com"
|
root = "https://www.weasyl.com"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
Extractor.__init__(self, match)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def populate_submission(data):
|
def populate_submission(data):
|
||||||
# Some submissions don't have content and can be skipped
|
# Some submissions don't have content and can be skipped
|
||||||
if "submission" in data["media"]:
|
if "submission" in data["media"]:
|
||||||
data["url"] = data["media"]["submission"][0]["url"]
|
data["url"] = data["media"]["submission"][0]["url"]
|
||||||
data["extension"] = text.ext_from_url(data["url"])
|
data["date"] = text.parse_datetime(
|
||||||
|
data["posted_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||||
|
text.nameext_from_url(data["url"], data)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -36,100 +34,110 @@ class WeasylExtractor(Extractor):
|
|||||||
return self.request(
|
return self.request(
|
||||||
"{}/api/submissions/{}/view".format(self.root, submitid)).json()
|
"{}/api/submissions/{}/view".format(self.root, submitid)).json()
|
||||||
|
|
||||||
def retrieve_journal(self, id):
|
def retrieve_journal(self, journalid):
|
||||||
data = self.request(
|
data = self.request(
|
||||||
"{}/api/journals/{}/view".format(self.root, id)).json()
|
"{}/api/journals/{}/view".format(self.root, journalid)).json()
|
||||||
data["extension"] = "html"
|
data["extension"] = "html"
|
||||||
data["html"] = "text:" + data["content"]
|
data["html"] = "text:" + data["content"]
|
||||||
|
data["date"] = text.parse_datetime(data["posted_at"])
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def submissions(self):
|
def submissions(self, owner_login, folderid=None):
|
||||||
nextid = 0
|
url = "{}/api/users/{}/gallery".format(self.root, owner_login)
|
||||||
while nextid is not None:
|
params = {
|
||||||
url = "{}/api/users/{}/gallery?nextid={}".format(
|
"nextid" : None,
|
||||||
self.root, self.owner_login, nextid
|
"folderid": folderid,
|
||||||
)
|
}
|
||||||
folderid = self.folderid if hasattr(self, "folderid") else None
|
|
||||||
if folderid:
|
while True:
|
||||||
url += "&folderid={}".format(self.folderid)
|
data = self.request(url, params=params).json()
|
||||||
json = self.request(url).json()
|
for submission in data["submissions"]:
|
||||||
for data in json["submissions"]:
|
if self.populate_submission(submission):
|
||||||
if self.populate_submission(data):
|
submission["folderid"] = folderid
|
||||||
data["folderid"] = folderid
|
|
||||||
# Do any submissions have more than one url? If so
|
# Do any submissions have more than one url? If so
|
||||||
# a urllist of the submission array urls would work.
|
# a urllist of the submission array urls would work.
|
||||||
yield Message.Url, data["url"], data
|
yield Message.Url, submission["url"], submission
|
||||||
nextid = json["nextid"]
|
if not data["nextid"]:
|
||||||
|
return
|
||||||
|
params["nextid"] = data["nextid"]
|
||||||
|
|
||||||
|
|
||||||
class WeasylSubmissionExtractor(WeasylExtractor):
|
class WeasylSubmissionExtractor(WeasylExtractor):
|
||||||
subcategory = "submission"
|
subcategory = "submission"
|
||||||
pattern = (BASE_PATTERN +
|
pattern = BASE_PATTERN + r"(?:~[\w-]+/submissions|submission)/(\d+)"
|
||||||
r"(?:~[\w-]+/submissions|submission)/(\d+)/?([\w-]+)?")
|
|
||||||
test = (
|
test = (
|
||||||
"https://www.weasyl.com/submission/2031/a-wesley", {
|
("https://www.weasyl.com/~fiz/submissions/2031/a-wesley", {
|
||||||
"keyword": {
|
"pattern": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29"
|
||||||
"url": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29"
|
|
||||||
"40be928532785dfbf35c37622664d2fbb8114c3b063df969562fc5"
|
"40be928532785dfbf35c37622664d2fbb8114c3b063df969562fc5"
|
||||||
"1/fiz-a-wesley.png",
|
"1/fiz-a-wesley.png",
|
||||||
}
|
"keyword": {
|
||||||
}
|
"comments" : int,
|
||||||
|
"date" : "dt:2012-04-20 00:38:04",
|
||||||
|
"description" : "<p>(flex)</p>",
|
||||||
|
"favorites" : int,
|
||||||
|
"folder_name" : "Wesley Stuff",
|
||||||
|
"folderid" : 2081,
|
||||||
|
"friends_only": False,
|
||||||
|
"owner" : "Fiz",
|
||||||
|
"owner_login" : "fiz",
|
||||||
|
"rating" : "general",
|
||||||
|
"submitid" : 2031,
|
||||||
|
"subtype" : "visual",
|
||||||
|
"tags" : list,
|
||||||
|
"title" : "A Wesley!",
|
||||||
|
"type" : "submission",
|
||||||
|
"views" : int,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
("https://www.weasyl.com/submission/2031/a-wesley"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
WeasylExtractor.__init__(self, match)
|
WeasylExtractor.__init__(self, match)
|
||||||
self.submitid = int(match.group(1))
|
self.submitid = match.group(1)
|
||||||
if len(match.groups()) == 3:
|
|
||||||
self.title = match.group(2)
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Version, 1
|
|
||||||
data = self.request_submission(self.submitid)
|
data = self.request_submission(self.submitid)
|
||||||
yield Message.Directory, data
|
|
||||||
if self.populate_submission(data):
|
if self.populate_submission(data):
|
||||||
|
yield Message.Directory, data
|
||||||
yield Message.Url, data["url"], data
|
yield Message.Url, data["url"], data
|
||||||
|
|
||||||
|
|
||||||
class WeasylSubmissionsExtractor(WeasylExtractor):
|
class WeasylSubmissionsExtractor(WeasylExtractor):
|
||||||
subcategory = "submissions"
|
subcategory = "submissions"
|
||||||
pattern = BASE_PATTERN + r"(?:~([\w-]+)/?|submissions/([\w-]+))$"
|
pattern = BASE_PATTERN + r"(?:~|submissions/)([\w-]+)/?$"
|
||||||
test = (
|
test = (
|
||||||
("https://www.weasyl.com/~tanidareal", {
|
("https://www.weasyl.com/~tanidareal", {
|
||||||
"count": ">= 200"
|
"count": ">= 200"
|
||||||
}),
|
}),
|
||||||
("https://www.weasyl.com/submissions/tanidareal", {
|
("https://www.weasyl.com/submissions/tanidareal"),
|
||||||
"count": ">= 200"
|
|
||||||
})
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
WeasylExtractor.__init__(self, match)
|
WeasylExtractor.__init__(self, match)
|
||||||
self.owner_login = match.group(1) if match.group(1) else match.group(2)
|
self.owner_login = match.group(1)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
yield Message.Directory, {"owner_login": self.owner_login}
|
yield Message.Directory, {"owner_login": self.owner_login}
|
||||||
yield from self.submissions()
|
yield from self.submissions(self.owner_login)
|
||||||
|
|
||||||
|
|
||||||
class WeasylFolderExtractor(WeasylExtractor):
|
class WeasylFolderExtractor(WeasylExtractor):
|
||||||
subcategory = "folder"
|
subcategory = "folder"
|
||||||
directory_fmt = ("{category}", "{owner_login}", "{folder_name}")
|
directory_fmt = ("{category}", "{owner_login}", "{folder_name}")
|
||||||
pattern = BASE_PATTERN + r"submissions/([\w-]+)\?folderid=(\d+)"
|
pattern = BASE_PATTERN + r"submissions/([\w-]+)\?folderid=(\d+)"
|
||||||
test = (
|
test = ("https://www.weasyl.com/submissions/tanidareal?folderid=7403", {
|
||||||
"https://www.weasyl.com/submissions/tanidareal?folderid=7403", {
|
"count": ">= 12"
|
||||||
"count": ">= 12"
|
})
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
WeasylExtractor.__init__(self, match)
|
WeasylExtractor.__init__(self, match)
|
||||||
self.owner_login = match.group(1)
|
self.owner_login, self.folderid = match.groups()
|
||||||
self.folderid = int(match.group(2))
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
iter = self.submissions()
|
iter = self.submissions(self.owner_login, self.folderid)
|
||||||
# Folder names are only on single submission api calls
|
# Folder names are only on single submission api calls
|
||||||
msg, url, data = next(iter)
|
msg, url, data = next(iter)
|
||||||
details = self.request_submission(data["submitid"])
|
details = self.request_submission(data["submitid"])
|
||||||
@@ -140,53 +148,37 @@ class WeasylFolderExtractor(WeasylExtractor):
|
|||||||
|
|
||||||
class WeasylJournalExtractor(WeasylExtractor):
|
class WeasylJournalExtractor(WeasylExtractor):
|
||||||
subcategory = "journal"
|
subcategory = "journal"
|
||||||
filename_fmt = "{journalid}_{title}.{extension}"
|
filename_fmt = "{journalid} {title}.{extension}"
|
||||||
archive_fmt = "{journalid}"
|
archive_fmt = "{journalid}"
|
||||||
pattern = BASE_PATTERN + r"journal/(\d+)/?([\w-]+)?"
|
pattern = BASE_PATTERN + r"journal/(\d+)"
|
||||||
test = (
|
test = ("https://www.weasyl.com/journal/17647/bbcode", {
|
||||||
("https://www.weasyl.com/journal/17647", {
|
"keyword": {
|
||||||
"keyword": {
|
"title" : "BBCode",
|
||||||
"content":
|
"date" : "dt:2013-09-19 23:11:23",
|
||||||
"<p><a>javascript:alert(42);</a></p><p>No more of that!</p>",
|
"content": "<p><a>javascript:alert(42);</a></p>"
|
||||||
"title": "bbcode",
|
"<p>No more of that!</p>",
|
||||||
}
|
},
|
||||||
}),
|
})
|
||||||
("https://www.weasyl.com/journal/17647/bbcode", {
|
|
||||||
"keyword": {
|
|
||||||
"content":
|
|
||||||
"<p><a>javascript:alert(42);</a></p><p>No more of that!</p>",
|
|
||||||
"title": "bbcode",
|
|
||||||
}
|
|
||||||
})
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
WeasylExtractor.__init__(self, match)
|
WeasylExtractor.__init__(self, match)
|
||||||
self.journalid = int(match.group(1))
|
self.journalid = match.group(1)
|
||||||
if match.group(2):
|
|
||||||
self.title = match.group(2)
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Version, 1
|
|
||||||
data = self.retrieve_journal(self.journalid)
|
data = self.retrieve_journal(self.journalid)
|
||||||
if hasattr(self, "title"):
|
yield Message.Version, 1
|
||||||
data["title"] = self.title
|
|
||||||
else:
|
|
||||||
data["title"] = data["title"].lower()
|
|
||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
yield Message.Url, data["html"], data
|
yield Message.Url, data["html"], data
|
||||||
|
|
||||||
|
|
||||||
class WeasylJournalsExtractor(WeasylExtractor):
|
class WeasylJournalsExtractor(WeasylExtractor):
|
||||||
subcategory = "journals"
|
subcategory = "journals"
|
||||||
filename_fmt = "{journalid}_{title}.{extension}"
|
filename_fmt = "{journalid} {title}.{extension}"
|
||||||
archive_fmt = "{journalid}"
|
archive_fmt = "{journalid}"
|
||||||
pattern = BASE_PATTERN + r"journals/([\w-]+)"
|
pattern = BASE_PATTERN + r"journals/([\w-]+)"
|
||||||
test = (
|
test = ("https://www.weasyl.com/journals/charmander", {
|
||||||
"https://www.weasyl.com/journals/charmander", {
|
"count": ">= 2",
|
||||||
"count": ">= 2",
|
})
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
WeasylExtractor.__init__(self, match)
|
WeasylExtractor.__init__(self, match)
|
||||||
@@ -195,12 +187,9 @@ class WeasylJournalsExtractor(WeasylExtractor):
|
|||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
yield Message.Directory, {"owner_login": self.owner_login}
|
yield Message.Directory, {"owner_login": self.owner_login}
|
||||||
response = self.request("{}/journals/{}".format(
|
|
||||||
self.root, self.owner_login
|
|
||||||
))
|
|
||||||
|
|
||||||
for journal in re.finditer(r'"/journal/(\d+)/([\w-]+)"',
|
url = "{}/journals/{}".format(self.root, self.owner_login)
|
||||||
response.text):
|
page = self.request(url).text
|
||||||
data = self.retrieve_journal(int(journal.group(1)))
|
for journalid in text.extract_iter(page, 'href="/journal/', '/'):
|
||||||
data["title"] = journal.group(2)
|
data = self.retrieve_journal(journalid)
|
||||||
yield Message.Url, data["html"], data
|
yield Message.Url, data["html"], data
|
||||||
|
|||||||
@@ -143,6 +143,10 @@ SUBCATEGORY_MAP = {
|
|||||||
"wikiart": {
|
"wikiart": {
|
||||||
"artists": "Artist Listings",
|
"artists": "Artist Listings",
|
||||||
},
|
},
|
||||||
|
"weasyl": {
|
||||||
|
"journals" : "",
|
||||||
|
"submissions": "",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
_OAUTH = " (`OAuth <https://github.com/mikf/gallery-dl#oauth>`__)"
|
_OAUTH = " (`OAuth <https://github.com/mikf/gallery-dl#oauth>`__)"
|
||||||
|
|||||||
Reference in New Issue
Block a user