[artstation] add album extractor (#80)
This commit is contained in:
@@ -10,17 +10,17 @@ Site URL Capabilities
|
|||||||
arch.b4k.co https://arch.b4k.co/ Threads
|
arch.b4k.co https://arch.b4k.co/ Threads
|
||||||
Archive of Sins https://archiveofsins.com/ Threads
|
Archive of Sins https://archiveofsins.com/ Threads
|
||||||
Archived.Moe https://archived.moe/ Threads
|
Archived.Moe https://archived.moe/ Threads
|
||||||
ArtStation https://www.artstation.com/ Images from Users, individual Images, Likes
|
ArtStation https://www.artstation.com/ |Images from Use-1|
|
||||||
Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches
|
Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches
|
||||||
Desuarchive https://desuarchive.org/ Threads
|
Desuarchive https://desuarchive.org/ Threads
|
||||||
DeviantArt https://www.deviantart.com/ |Collections, De-1| Optional (OAuth)
|
DeviantArt https://www.deviantart.com/ |Collections, De-2| Optional (OAuth)
|
||||||
Doki Reader https://kobato.hologfx.com/ Chapters, Manga
|
Doki Reader https://kobato.hologfx.com/ Chapters, Manga
|
||||||
Dynasty Reader https://dynasty-scans.com/ Chapters
|
Dynasty Reader https://dynasty-scans.com/ Chapters
|
||||||
e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches
|
e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches
|
||||||
ExHentai https://exhentai.org/ Galleries Optional
|
ExHentai https://exhentai.org/ Galleries Optional
|
||||||
Fallen Angels Scans https://www.fascans.com/ Chapters, Manga
|
Fallen Angels Scans https://www.fascans.com/ Chapters, Manga
|
||||||
Fireden https://boards.fireden.net/ Threads
|
Fireden https://boards.fireden.net/ Threads
|
||||||
Flickr https://www.flickr.com/ |Images from Use-2| Optional (OAuth)
|
Flickr https://www.flickr.com/ |Images from Use-3| Optional (OAuth)
|
||||||
Futaba Channel https://www.2chan.net/ Threads
|
Futaba Channel https://www.2chan.net/ Threads
|
||||||
Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches
|
Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches
|
||||||
Gfycat https://gfycat.com/ individual Images
|
Gfycat https://gfycat.com/ individual Images
|
||||||
@@ -57,7 +57,7 @@ nijie https://nijie.info/ Images from Users, indi
|
|||||||
Nyafuu Archive https://archive.nyafuu.org/ Threads
|
Nyafuu Archive https://archive.nyafuu.org/ Threads
|
||||||
Pawoo https://pawoo.net Images from Users, Images from Statuses
|
Pawoo https://pawoo.net Images from Users, Images from Statuses
|
||||||
Pinterest https://www.pinterest.com Boards, Pins, pin.it Links
|
Pinterest https://www.pinterest.com Boards, Pins, pin.it Links
|
||||||
Pixiv https://www.pixiv.net/ |Images from Use-3| Required
|
Pixiv https://www.pixiv.net/ |Images from Use-4| Required
|
||||||
PowerManga https://powermanga.org/ Chapters, Manga
|
PowerManga https://powermanga.org/ Chapters, Manga
|
||||||
Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga
|
Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga
|
||||||
Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics
|
Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics
|
||||||
@@ -91,6 +91,7 @@ Turboimagehost https://turboimagehost.com/ individual Images
|
|||||||
==================== =================================== ================================================== ================
|
==================== =================================== ================================================== ================
|
||||||
|
|
||||||
.. |http://www.thes-0| replace:: http://www.thespectrum.net/manga_scans/
|
.. |http://www.thes-0| replace:: http://www.thespectrum.net/manga_scans/
|
||||||
.. |Collections, De-1| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals
|
.. |Images from Use-1| replace:: Images from Users, Albums, individual Images, Likes
|
||||||
.. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
|
.. |Collections, De-2| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals
|
||||||
.. |Images from Use-3| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images
|
.. |Images from Use-3| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
|
||||||
|
.. |Images from Use-4| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ import string
|
|||||||
class ArtstationExtractor(Extractor):
|
class ArtstationExtractor(Extractor):
|
||||||
"""Base class for artstation extractors"""
|
"""Base class for artstation extractors"""
|
||||||
category = "artstation"
|
category = "artstation"
|
||||||
directory_fmt = ["{category}", "{username}"]
|
|
||||||
filename_fmt = "{category}_{id}_{asset[id]}_{title}.{extension}"
|
filename_fmt = "{category}_{id}_{asset[id]}_{title}.{extension}"
|
||||||
|
directory_fmt = ["{category}", "{userinfo[username]}"]
|
||||||
archive_fmt = "{asset[id]}"
|
archive_fmt = "{asset[id]}"
|
||||||
root = "https://www.artstation.com"
|
root = "https://www.artstation.com"
|
||||||
per_page = 50
|
per_page = 50
|
||||||
@@ -29,18 +29,15 @@ class ArtstationExtractor(Extractor):
|
|||||||
self.external = self.config("external", False)
|
self.external = self.config("external", False)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
userinfo = None
|
data = self.metadata()
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
|
yield Message.Directory, data
|
||||||
|
|
||||||
for project_id in self.projects():
|
for project_id in self.projects():
|
||||||
for asset in self.get_project_assets(project_id):
|
for asset in self.get_project_assets(project_id):
|
||||||
if not userinfo:
|
|
||||||
userinfo = self.get_user_info(
|
|
||||||
self.user or asset["user"]["username"])
|
|
||||||
yield Message.Directory, userinfo
|
|
||||||
|
|
||||||
adict = asset["asset"]
|
adict = asset["asset"]
|
||||||
asset["userinfo"] = userinfo
|
if data:
|
||||||
|
asset.update(data)
|
||||||
|
|
||||||
if adict["has_image"]:
|
if adict["has_image"]:
|
||||||
url = adict["image_url"]
|
url = adict["image_url"]
|
||||||
@@ -51,6 +48,10 @@ class ArtstationExtractor(Extractor):
|
|||||||
url = text.extract(adict["player_embedded"], '"', '"')[0]
|
url = text.extract(adict["player_embedded"], '"', '"')[0]
|
||||||
yield Message.Queue, url, asset
|
yield Message.Queue, url, asset
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
"""Return general metadata"""
|
||||||
|
return {"userinfo": self.get_user_info(self.user)}
|
||||||
|
|
||||||
def projects(self):
|
def projects(self):
|
||||||
"""Return an iterable containing all relevant project IDs"""
|
"""Return an iterable containing all relevant project IDs"""
|
||||||
|
|
||||||
@@ -111,7 +112,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
|
|||||||
"""Extractor for all projects of an artstation user"""
|
"""Extractor for all projects of an artstation user"""
|
||||||
subcategory = "user"
|
subcategory = "user"
|
||||||
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
|
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||||
r"/(?!artwork|projects)([^/?&#]+)/?$",
|
r"/(?!artwork|projects)([^/?&#]+)(?:/albums/all)?/?$",
|
||||||
r"(?:https?://)?((?!www)\w+)\.artstation\.com"
|
r"(?:https?://)?((?!www)\w+)\.artstation\.com"
|
||||||
r"(?:/(?:projects/?)?)?$"]
|
r"(?:/(?:projects/?)?)?$"]
|
||||||
test = [
|
test = [
|
||||||
@@ -120,6 +121,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
|
|||||||
r"/images/images/\d+/\d+/\d+/large/[^/]+",
|
r"/images/images/\d+/\d+/\d+/large/[^/]+",
|
||||||
"count": ">= 6",
|
"count": ">= 6",
|
||||||
}),
|
}),
|
||||||
|
("https://www.artstation.com/gaerikim/albums/all/", None),
|
||||||
("https://gaerikim.artstation.com/", None),
|
("https://gaerikim.artstation.com/", None),
|
||||||
("https://gaerikim.artstation.com/projects/", None),
|
("https://gaerikim.artstation.com/projects/", None),
|
||||||
]
|
]
|
||||||
@@ -129,10 +131,55 @@ class ArtstationUserExtractor(ArtstationExtractor):
|
|||||||
return self._pagination(url)
|
return self._pagination(url)
|
||||||
|
|
||||||
|
|
||||||
|
class ArtstationAlbumExtractor(ArtstationExtractor):
|
||||||
|
"""Extractor for all projects of an artstation user"""
|
||||||
|
subcategory = "album"
|
||||||
|
directory_fmt = ["{category}", "{userinfo[username]}", "Albums",
|
||||||
|
"{album[id]} - {album[title]}"]
|
||||||
|
archive_fmt = "a_{album[id]}_{asset[id]}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||||
|
r"/(?!artwork|projects)([^/?&#]+)/albums/(\d+)",
|
||||||
|
r"(?:https?://)?((?!www)\w+)\.artstation\.com"
|
||||||
|
r"/albums/(\d+)"]
|
||||||
|
test = [
|
||||||
|
("https://www.artstation.com/huimeiye/albums/770899", {
|
||||||
|
"count": 2,
|
||||||
|
}),
|
||||||
|
("https://www.artstation.com/huimeiye/albums/770898", {
|
||||||
|
"exception": exception.NotFoundError,
|
||||||
|
}),
|
||||||
|
("https://huimeiye.artstation.com/albums/770899", None),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
ArtstationExtractor.__init__(self, match)
|
||||||
|
self.album_id = util.safe_int(match.group(2))
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
userinfo = self.get_user_info(self.user)
|
||||||
|
album = None
|
||||||
|
|
||||||
|
for album in userinfo["albums_with_community_projects"]:
|
||||||
|
if album["id"] == self.album_id:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise exception.NotFoundError("album")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"userinfo": userinfo,
|
||||||
|
"album": album
|
||||||
|
}
|
||||||
|
|
||||||
|
def projects(self):
|
||||||
|
url = "{}/users/{}/projects.json?album_id={}".format(
|
||||||
|
self.root, self.user, self.album_id)
|
||||||
|
return self._pagination(url)
|
||||||
|
|
||||||
|
|
||||||
class ArtstationLikesExtractor(ArtstationExtractor):
|
class ArtstationLikesExtractor(ArtstationExtractor):
|
||||||
"""Extractor for liked projects of an artstation user"""
|
"""Extractor for liked projects of an artstation user"""
|
||||||
subcategory = "likes"
|
subcategory = "likes"
|
||||||
directory_fmt = ["{category}", "{username}", "Likes"]
|
directory_fmt = ["{category}", "{userinfo[username]}", "Likes"]
|
||||||
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
|
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
|
||||||
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
|
pattern = [r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||||
r"/(?!artwork|projects)([^/?&#]+)/likes/?"]
|
r"/(?!artwork|projects)([^/?&#]+)/likes/?"]
|
||||||
@@ -175,6 +222,21 @@ class ArtstationImageExtractor(ArtstationExtractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self)
|
ArtstationExtractor.__init__(self)
|
||||||
self.project_id = match.group(1)
|
self.project_id = match.group(1)
|
||||||
|
self.assets = None
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
self.assets = [
|
||||||
|
asset.copy()
|
||||||
|
for asset in ArtstationExtractor.get_project_assets(
|
||||||
|
self, self.project_id
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
self.user = self.assets[0]["user"]["username"]
|
||||||
|
return ArtstationExtractor.metadata(self)
|
||||||
|
|
||||||
def projects(self):
|
def projects(self):
|
||||||
return (self.project_id,)
|
return (self.project_id,)
|
||||||
|
|
||||||
|
def get_project_assets(self, project_id):
|
||||||
|
return self.assets
|
||||||
|
|||||||
Reference in New Issue
Block a user