[listal] add 'image' & 'people' extractors (#1589 #8921)

* listal extractor * add listal to init * fix flake8 & formatting & extractor names/subcategories * remove 're' import * remove 'datetime' import * update & simplify extractors * update supportedsites * add tests --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
2026-01-27 10:26:41 -07:00
parent eaaa25b6e4
commit ef8f2869e7
5 changed files with 140 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -619,6 +619,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Galleries</td>
    <td></td>
 </tr>
+<tr id="listal" title="listal">
+    <td>Listal</td>
+    <td>https://listal.com</td>
+    <td>individual Images, People</td>
+    <td></td>
+</tr>
 <tr id="livedoor" title="livedoor">
    <td>livedoor Blog</td>
    <td>http://blog.livedoor.jp/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -119,6 +119,7 @@ modules = [
    "lensdump",
    "lexica",
    "lightroom",
+    "listal",
    "livedoor",
    "lofter",
    "luscious",
--- a/gallery_dl/extractor/listal.py
+++ b/gallery_dl/extractor/listal.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://listal.com"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?listal\.com"
+
+
+class ListalExtractor(Extractor):
+    """Base class for Listal extractor"""
+    category = "listal"
+    root = "https://www.listal.com"
+    directory_fmt = ("{category}", "{title}")
+    filename_fmt = "{id}_{filename}.{extension}"
+    archive_fmt = "{id}/{filename}"
+
+    def items(self):
+        for image_id in self.image_ids():
+            img = self._extract_image(image_id)
+            url = img["url"]
+            text.nameext_from_url(url, img)
+            yield Message.Directory, "", img
+            yield Message.Url, url, img
+
+    def _pagination(self, base_url, pnum=None):
+        if pnum is None:
+            url = base_url
+            pnum = 1
+        else:
+            url = f"{base_url}/{pnum}"
+
+        while True:
+            page = self.request(url).text
+
+            yield page
+
+            if pnum is None or "<span class='nextprev'>Next" in page:
+                return
+            pnum += 1
+            url = f"{base_url}/{pnum}"
+
+    def _extract_image(self, image_id):
+        url = f"{self.root}/viewimage/{image_id}h"
+        page = self.request(url).text
+        extr = text.extract_from(page)
+
+        return {
+            "id"        : image_id,
+            "url"       : extr("<div><center><img src='", "'"),
+            "title"     : text.unescape(extr('title="', '"')),
+            "width"     : text.parse_int(extr("width='", "'")),
+            "height"    : text.parse_int(extr("height='", "'")),
+            "author_url": extr("Added by <a href='", "'"),
+            "author"    : text.unescape(extr(">", "<")),
+            "date"      : self.parse_datetime(extr(
+                " ago on ", "<"), "%d %B %Y %H:%M"),
+        }
+
+
+class ListalImageExtractor(ListalExtractor):
+    """Extractor for listal pictures"""
+    subcategory = "image"
+    pattern = BASE_PATTERN + r"/viewimage/(\d+)"
+    example = "https://www.listal.com/viewimage/12345678"
+
+    def image_ids(self):
+        return (self.groups[0],)
+
+
+class ListalPeopleExtractor(ListalExtractor):
+    """Extractor for listal people pictures"""
+    subcategory = "people"
+    pattern = BASE_PATTERN + r"/([^/?#]+)/pictures"
+    example = "https://www.listal.com/NAME/pictures"
+
+    def image_ids(self):
+        url = f"{self.root}/{self.groups[0]}/pictures"
+        for page in self._pagination(url):
+            yield from text.extract_iter(page, "listal.com/viewimage/", "'")
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -240,6 +240,7 @@ SUBCATEGORY_MAP = {
    "issue"  : "Comic Issues",
    "manga"  : "Manga",
    "media"  : "Media Files",
+    "people" : "People",
    "popular": "Popular Images",
    "recent" : "Recent Images",
    "saved"  : "Saved Posts",
--- a/test/results/listal.py
+++ b/test/results/listal.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import listal
+
+
+__tests__ = (
+{
+    "#url"     : "https://www.listal.com/viewimage/29620846",
+    "#class"   : listal.ListalImageExtractor,
+    "#results" : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg",
+
+    "author"    : "sinaia16",
+    "author_url": "https://sinaia16.listal.com",
+    "date"      : "dt:2024-07-18 18:50:00",
+    "extension" : "jpg",
+    "filename"  : "1030full-jim-carrey",
+    "height"    : 1037,
+    "id"        : "29620846",
+    "title"     : "Jim Carrey",
+    "url"       : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg",
+    "width"     : 1030,
+},
+
+{
+    "#url"     : "https://www.listal.com/jim-carrey/pictures",
+    "#class"   : listal.ListalPeopleExtractor,
+    "#pattern" : r"https://i\w+\.lisimg\.com/image/\d+/\d+full-.+\.jpg",
+    "#range"   : "1-10",
+    "#count"   : 10,
+
+    "author"    : str,
+    "author_url": r"re:https://\w+.listal.com",
+    "date"      : "type:datetime",
+    "extension" : "jpg",
+    "filename"  : str,
+    "width"     : range(200, 2000),
+    "height"    : range(200, 2000),
+    "id"        : r"re:\d+",
+    "title"     : "Jim Carrey",
+    "url"       : r"re:https://.+",
+},
+
+)