[piczel] fix extraction (#6735)
- fix pagination - update API endpoints - provide 'count' metadata field - use BASE_PATTERN and self.groups[…]
This commit is contained in:
@@ -11,6 +11,8 @@
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?piczel\.tv"
|
||||
|
||||
|
||||
class PiczelExtractor(Extractor):
|
||||
"""Base class for piczel extractors"""
|
||||
@@ -30,6 +32,7 @@ class PiczelExtractor(Extractor):
|
||||
if post["multi"]:
|
||||
images = post["images"]
|
||||
del post["images"]
|
||||
post["count"] = len(images)
|
||||
yield Message.Directory, post
|
||||
for post["num"], image in enumerate(images):
|
||||
if "id" in image:
|
||||
@@ -39,6 +42,7 @@ class PiczelExtractor(Extractor):
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
else:
|
||||
post["count"] = 1
|
||||
yield Message.Directory, post
|
||||
post["num"] = 0
|
||||
url = post["image"]["url"]
|
||||
@@ -47,35 +51,27 @@ class PiczelExtractor(Extractor):
|
||||
def posts(self):
|
||||
"""Return an iterable with all relevant post objects"""
|
||||
|
||||
def _pagination(self, url, folder_id=None):
|
||||
params = {
|
||||
"from_id" : None,
|
||||
"folder_id": folder_id,
|
||||
}
|
||||
def _pagination(self, url, pnum=1):
|
||||
params = {"page": pnum}
|
||||
|
||||
while True:
|
||||
data = self.request(url, params=params).json()
|
||||
if not data:
|
||||
return
|
||||
params["from_id"] = data[-1]["id"]
|
||||
|
||||
for post in data:
|
||||
if not folder_id or folder_id == post["folder_id"]:
|
||||
yield post
|
||||
yield from data["data"]
|
||||
|
||||
params["page"] = data["meta"]["next_page"]
|
||||
if not params["page"]:
|
||||
return
|
||||
|
||||
|
||||
class PiczelUserExtractor(PiczelExtractor):
|
||||
"""Extractor for all images from a user's gallery"""
|
||||
subcategory = "user"
|
||||
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$"
|
||||
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$"
|
||||
example = "https://piczel.tv/gallery/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
PiczelExtractor.__init__(self, match)
|
||||
self.user = match.group(1)
|
||||
|
||||
def posts(self):
|
||||
url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
|
||||
url = "{}/api/users/{}/gallery".format(self.root_api, self.groups[0])
|
||||
return self._pagination(url)
|
||||
|
||||
|
||||
@@ -84,29 +80,20 @@ class PiczelFolderExtractor(PiczelExtractor):
|
||||
subcategory = "folder"
|
||||
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
|
||||
archive_fmt = "f{folder[id]}_{id}_{num}"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
|
||||
r"/gallery/(?!image)([^/?#]+)/(\d+)")
|
||||
pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)"
|
||||
example = "https://piczel.tv/gallery/USER/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
PiczelExtractor.__init__(self, match)
|
||||
self.user, self.folder_id = match.groups()
|
||||
|
||||
def posts(self):
|
||||
url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
|
||||
return self._pagination(url, int(self.folder_id))
|
||||
url = "{}/api/gallery/folder/{}".format(self.root_api, self.groups[0])
|
||||
return self._pagination(url)
|
||||
|
||||
|
||||
class PiczelImageExtractor(PiczelExtractor):
|
||||
"""Extractor for individual images"""
|
||||
subcategory = "image"
|
||||
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"
|
||||
pattern = BASE_PATTERN + r"/gallery/image/(\d+)"
|
||||
example = "https://piczel.tv/gallery/image/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
PiczelExtractor.__init__(self, match)
|
||||
self.image_id = match.group(1)
|
||||
|
||||
def posts(self):
|
||||
url = "{}/api/gallery/{}".format(self.root_api, self.image_id)
|
||||
url = "{}/api/gallery/{}".format(self.root_api, self.groups[0])
|
||||
return (self.request(url).json(),)
|
||||
|
||||
@@ -9,27 +9,35 @@ from gallery_dl.extractor import piczel
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://piczel.tv/gallery/Bikupan",
|
||||
"#category": ("", "piczel", "user"),
|
||||
"#class" : piczel.PiczelUserExtractor,
|
||||
"#range" : "1-100",
|
||||
"#count" : ">= 100",
|
||||
"#url" : "https://piczel.tv/gallery/Bikupan",
|
||||
"#class": piczel.PiczelUserExtractor,
|
||||
"#range": "1-100",
|
||||
"#count": ">= 100",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://piczel.tv/gallery/Lulena/1114",
|
||||
"#category": ("", "piczel", "folder"),
|
||||
"#class" : piczel.PiczelFolderExtractor,
|
||||
"#count" : ">= 4",
|
||||
"#url" : "https://piczel.tv/gallery/Lulena/1114",
|
||||
"#class": piczel.PiczelFolderExtractor,
|
||||
"#urls" : (
|
||||
"https://piczel.tv/static/uploads/gallery_image/32920/image/11194/1544126403-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/gallery_image/32920/image/8008/1533616260-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/plain_image/32920/image/3761/3761-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/plain_image/32920/image/3762/3762-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/gallery_image/32920/image/7991/1533513024-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/gallery_image/32920/image/7806/1532236348-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/gallery_image/32920/image/7800/1532235785-Lulena.png",
|
||||
),
|
||||
|
||||
"folder_id": 1114,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://piczel.tv/gallery/image/7807",
|
||||
"#category": ("", "piczel", "image"),
|
||||
"#class" : piczel.PiczelImageExtractor,
|
||||
"#pattern" : r"https://(\w+\.)?piczel\.tv/static/uploads/gallery_image/32920/image/7807/1532236438-Lulena\.png",
|
||||
"#url" : "https://piczel.tv/gallery/image/7807",
|
||||
"#class": piczel.PiczelImageExtractor,
|
||||
"#urls" : "https://piczel.tv/static/uploads/gallery_image/32920/image/7807/1532236438-Lulena.png",
|
||||
"#sha1_content": "df9a053a24234474a19bce2b7e27e0dec23bff87",
|
||||
|
||||
"count" : 1,
|
||||
"created_at" : "2018-07-22T05:13:58.000Z",
|
||||
"date" : "dt:2018-07-22 05:13:58",
|
||||
"description" : None,
|
||||
@@ -54,4 +62,51 @@ __tests__ = (
|
||||
"views" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://piczel.tv/gallery/image/8008",
|
||||
"#comment": "multi",
|
||||
"#class" : piczel.PiczelImageExtractor,
|
||||
"#urls" : (
|
||||
"https://piczel.tv/static/uploads/gallery_image/32920/image/8008/1533616260-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/plain_image/32920/image/3761/3761-Lulena.png",
|
||||
"https://piczel.tv/static/uploads/plain_image/32920/image/3762/3762-Lulena.png",
|
||||
),
|
||||
|
||||
"count" : 3,
|
||||
"created_at" : "2018-08-07T04:31:00.000Z",
|
||||
"curated" : False,
|
||||
"date" : "dt:2018-08-07 04:31:00",
|
||||
"description": "8/7/18",
|
||||
"extension" : "png",
|
||||
"favorites_count": range(3, 10),
|
||||
"folder_id" : 1114,
|
||||
"width" : None,
|
||||
"height" : None,
|
||||
"id" : 8008,
|
||||
"is_flash" : False,
|
||||
"is_video" : False,
|
||||
"multi" : True,
|
||||
"nsfw" : True,
|
||||
"num" : {0, 1, 2},
|
||||
"password_protected" : False,
|
||||
"published_at" : "2018-08-07T04:31:00.000Z",
|
||||
"rendered_description": "<p>8/7/18</p>",
|
||||
"status" : "published",
|
||||
"thumbnail" : None,
|
||||
"title" : "❤",
|
||||
"views" : 314,
|
||||
"tags" : [
|
||||
"original",
|
||||
"Orc",
|
||||
"tanlines",
|
||||
],
|
||||
"user" : {
|
||||
"follower_count": range(15, 25),
|
||||
"id" : 32920,
|
||||
"premium?": False,
|
||||
"role" : "user",
|
||||
"username": "Lulena",
|
||||
},
|
||||
},
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user