[listal] add 'image' & 'people' extractors (#1589 #8921)

* listal extractor
* add listal to init
* fix flake8 & formatting & extractor names/subcategories

* remove 're' import
* remove 'datetime' import
* update & simplify extractors
* update supportedsites
* add tests

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
SubmarineScurvy
2026-01-27 10:26:41 -07:00
committed by GitHub
parent eaaa25b6e4
commit ef8f2869e7
5 changed files with 140 additions and 0 deletions

View File

@@ -619,6 +619,12 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries</td>
<td></td>
</tr>
<tr id="listal" title="listal">
<td>Listal</td>
<td>https://listal.com</td>
<td>individual Images, People</td>
<td></td>
</tr>
<tr id="livedoor" title="livedoor">
<td>livedoor Blog</td>
<td>http://blog.livedoor.jp/</td>

View File

@@ -119,6 +119,7 @@ modules = [
"lensdump",
"lexica",
"lightroom",
"listal",
"livedoor",
"lofter",
"luscious",

View File

@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://listal.com"""
from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?listal\.com"
class ListalExtractor(Extractor):
"""Base class for Listal extractor"""
category = "listal"
root = "https://www.listal.com"
directory_fmt = ("{category}", "{title}")
filename_fmt = "{id}_{filename}.{extension}"
archive_fmt = "{id}/{filename}"
def items(self):
for image_id in self.image_ids():
img = self._extract_image(image_id)
url = img["url"]
text.nameext_from_url(url, img)
yield Message.Directory, "", img
yield Message.Url, url, img
def _pagination(self, base_url, pnum=None):
if pnum is None:
url = base_url
pnum = 1
else:
url = f"{base_url}/{pnum}"
while True:
page = self.request(url).text
yield page
if pnum is None or "<span class='nextprev'>Next" in page:
return
pnum += 1
url = f"{base_url}/{pnum}"
def _extract_image(self, image_id):
url = f"{self.root}/viewimage/{image_id}h"
page = self.request(url).text
extr = text.extract_from(page)
return {
"id" : image_id,
"url" : extr("<div><center><img src='", "'"),
"title" : text.unescape(extr('title="', '"')),
"width" : text.parse_int(extr("width='", "'")),
"height" : text.parse_int(extr("height='", "'")),
"author_url": extr("Added by <a href='", "'"),
"author" : text.unescape(extr(">", "<")),
"date" : self.parse_datetime(extr(
" ago on ", "<"), "%d %B %Y %H:%M"),
}
class ListalImageExtractor(ListalExtractor):
"""Extractor for listal pictures"""
subcategory = "image"
pattern = BASE_PATTERN + r"/viewimage/(\d+)"
example = "https://www.listal.com/viewimage/12345678"
def image_ids(self):
return (self.groups[0],)
class ListalPeopleExtractor(ListalExtractor):
"""Extractor for listal people pictures"""
subcategory = "people"
pattern = BASE_PATTERN + r"/([^/?#]+)/pictures"
example = "https://www.listal.com/NAME/pictures"
def image_ids(self):
url = f"{self.root}/{self.groups[0]}/pictures"
for page in self._pagination(url):
yield from text.extract_iter(page, "listal.com/viewimage/", "'")

View File

@@ -240,6 +240,7 @@ SUBCATEGORY_MAP = {
"issue" : "Comic Issues",
"manga" : "Manga",
"media" : "Media Files",
"people" : "People",
"popular": "Popular Images",
"recent" : "Recent Images",
"saved" : "Saved Posts",

47
test/results/listal.py Normal file
View File

@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import listal
__tests__ = (
{
"#url" : "https://www.listal.com/viewimage/29620846",
"#class" : listal.ListalImageExtractor,
"#results" : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg",
"author" : "sinaia16",
"author_url": "https://sinaia16.listal.com",
"date" : "dt:2024-07-18 18:50:00",
"extension" : "jpg",
"filename" : "1030full-jim-carrey",
"height" : 1037,
"id" : "29620846",
"title" : "Jim Carrey",
"url" : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg",
"width" : 1030,
},
{
"#url" : "https://www.listal.com/jim-carrey/pictures",
"#class" : listal.ListalPeopleExtractor,
"#pattern" : r"https://i\w+\.lisimg\.com/image/\d+/\d+full-.+\.jpg",
"#range" : "1-10",
"#count" : 10,
"author" : str,
"author_url": r"re:https://\w+.listal.com",
"date" : "type:datetime",
"extension" : "jpg",
"filename" : str,
"width" : range(200, 2000),
"height" : range(200, 2000),
"id" : r"re:\d+",
"title" : "Jim Carrey",
"url" : r"re:https://.+",
},
)