[piczel] fix extraction (#6735)

- fix pagination
- update API endpoints
- provide 'count' metadata field
- use BASE_PATTERN and self.groups[…]
This commit is contained in:
Mike Fährmann
2024-12-27 15:08:08 +01:00
parent 167a726972
commit bc7e95684d
2 changed files with 86 additions and 44 deletions

View File

@@ -11,6 +11,8 @@
from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?piczel\.tv"
class PiczelExtractor(Extractor):
"""Base class for piczel extractors"""
@@ -30,6 +32,7 @@ class PiczelExtractor(Extractor):
if post["multi"]:
images = post["images"]
del post["images"]
post["count"] = len(images)
yield Message.Directory, post
for post["num"], image in enumerate(images):
if "id" in image:
@@ -39,6 +42,7 @@ class PiczelExtractor(Extractor):
yield Message.Url, url, text.nameext_from_url(url, post)
else:
post["count"] = 1
yield Message.Directory, post
post["num"] = 0
url = post["image"]["url"]
@@ -47,35 +51,27 @@ class PiczelExtractor(Extractor):
def posts(self):
"""Return an iterable with all relevant post objects"""
def _pagination(self, url, folder_id=None):
params = {
"from_id" : None,
"folder_id": folder_id,
}
def _pagination(self, url, pnum=1):
params = {"page": pnum}
while True:
data = self.request(url, params=params).json()
if not data:
return
params["from_id"] = data[-1]["id"]
for post in data:
if not folder_id or folder_id == post["folder_id"]:
yield post
yield from data["data"]
params["page"] = data["meta"]["next_page"]
if not params["page"]:
return
class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$"
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$"
example = "https://piczel.tv/gallery/USER"
def __init__(self, match):
PiczelExtractor.__init__(self, match)
self.user = match.group(1)
def posts(self):
url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
url = "{}/api/users/{}/gallery".format(self.root_api, self.groups[0])
return self._pagination(url)
@@ -84,29 +80,20 @@ class PiczelFolderExtractor(PiczelExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
archive_fmt = "f{folder[id]}_{id}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
r"/gallery/(?!image)([^/?#]+)/(\d+)")
pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)"
example = "https://piczel.tv/gallery/USER/12345"
def __init__(self, match):
PiczelExtractor.__init__(self, match)
self.user, self.folder_id = match.groups()
def posts(self):
url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
return self._pagination(url, int(self.folder_id))
url = "{}/api/gallery/folder/{}".format(self.root_api, self.groups[0])
return self._pagination(url)
class PiczelImageExtractor(PiczelExtractor):
"""Extractor for individual images"""
subcategory = "image"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"
pattern = BASE_PATTERN + r"/gallery/image/(\d+)"
example = "https://piczel.tv/gallery/image/12345"
def __init__(self, match):
PiczelExtractor.__init__(self, match)
self.image_id = match.group(1)
def posts(self):
url = "{}/api/gallery/{}".format(self.root_api, self.image_id)
url = "{}/api/gallery/{}".format(self.root_api, self.groups[0])
return (self.request(url).json(),)

View File

@@ -9,27 +9,35 @@ from gallery_dl.extractor import piczel
__tests__ = (
{
"#url" : "https://piczel.tv/gallery/Bikupan",
"#category": ("", "piczel", "user"),
"#class" : piczel.PiczelUserExtractor,
"#range" : "1-100",
"#count" : ">= 100",
"#url" : "https://piczel.tv/gallery/Bikupan",
"#class": piczel.PiczelUserExtractor,
"#range": "1-100",
"#count": ">= 100",
},
{
"#url" : "https://piczel.tv/gallery/Lulena/1114",
"#category": ("", "piczel", "folder"),
"#class" : piczel.PiczelFolderExtractor,
"#count" : ">= 4",
"#url" : "https://piczel.tv/gallery/Lulena/1114",
"#class": piczel.PiczelFolderExtractor,
"#urls" : (
"https://piczel.tv/static/uploads/gallery_image/32920/image/11194/1544126403-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/8008/1533616260-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3761/3761-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3762/3762-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/7991/1533513024-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/7806/1532236348-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/7800/1532235785-Lulena.png",
),
"folder_id": 1114,
},
{
"#url" : "https://piczel.tv/gallery/image/7807",
"#category": ("", "piczel", "image"),
"#class" : piczel.PiczelImageExtractor,
"#pattern" : r"https://(\w+\.)?piczel\.tv/static/uploads/gallery_image/32920/image/7807/1532236438-Lulena\.png",
"#url" : "https://piczel.tv/gallery/image/7807",
"#class": piczel.PiczelImageExtractor,
"#urls" : "https://piczel.tv/static/uploads/gallery_image/32920/image/7807/1532236438-Lulena.png",
"#sha1_content": "df9a053a24234474a19bce2b7e27e0dec23bff87",
"count" : 1,
"created_at" : "2018-07-22T05:13:58.000Z",
"date" : "dt:2018-07-22 05:13:58",
"description" : None,
@@ -54,4 +62,51 @@ __tests__ = (
"views" : int,
},
{
"#url" : "https://piczel.tv/gallery/image/8008",
"#comment": "multi",
"#class" : piczel.PiczelImageExtractor,
"#urls" : (
"https://piczel.tv/static/uploads/gallery_image/32920/image/8008/1533616260-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3761/3761-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3762/3762-Lulena.png",
),
"count" : 3,
"created_at" : "2018-08-07T04:31:00.000Z",
"curated" : False,
"date" : "dt:2018-08-07 04:31:00",
"description": "8/7/18",
"extension" : "png",
"favorites_count": range(3, 10),
"folder_id" : 1114,
"width" : None,
"height" : None,
"id" : 8008,
"is_flash" : False,
"is_video" : False,
"multi" : True,
"nsfw" : True,
"num" : {0, 1, 2},
"password_protected" : False,
"published_at" : "2018-08-07T04:31:00.000Z",
"rendered_description": "<p>8/7/18</p>",
"status" : "published",
"thumbnail" : None,
"title" : "",
"views" : 314,
"tags" : [
"original",
"Orc",
"tanlines",
],
"user" : {
"follower_count": range(15, 25),
"id" : 32920,
"premium?": False,
"role" : "user",
"username": "Lulena",
},
},
)