From ef8f2869e71732f5f76998b340dd945c56527acb Mon Sep 17 00:00:00 2001 From: SubmarineScurvy <240775660+SubmarineScurvy@users.noreply.github.com> Date: Tue, 27 Jan 2026 10:26:41 -0700 Subject: [PATCH] [listal] add 'image' & 'people' extractors (#1589 #8921) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * listal extractor * add listal to init * fix flake8 & formatting & extractor names/subcategories * remove 're' import * remove 'datetime' import * update & simplify extractors * update supportedsites * add tests --------- Co-authored-by: Mike Fährmann --- docs/supportedsites.md | 6 +++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/listal.py | 85 ++++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + test/results/listal.py | 47 ++++++++++++++++++ 5 files changed, 140 insertions(+) create mode 100644 gallery_dl/extractor/listal.py create mode 100644 test/results/listal.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 679363a7..1432c5ca 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -619,6 +619,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + Listal + https://listal.com + individual Images, People + + livedoor Blog http://blog.livedoor.jp/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 5990fa7a..a179513e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -119,6 +119,7 @@ modules = [ "lensdump", "lexica", "lightroom", + "listal", "livedoor", "lofter", "luscious", diff --git a/gallery_dl/extractor/listal.py b/gallery_dl/extractor/listal.py new file mode 100644 index 00000000..05b5b6f2 --- /dev/null +++ b/gallery_dl/extractor/listal.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://listal.com""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?listal\.com" + + +class ListalExtractor(Extractor): + """Base class for Listal extractor""" + category = "listal" + root = "https://www.listal.com" + directory_fmt = ("{category}", "{title}") + filename_fmt = "{id}_{filename}.{extension}" + archive_fmt = "{id}/{filename}" + + def items(self): + for image_id in self.image_ids(): + img = self._extract_image(image_id) + url = img["url"] + text.nameext_from_url(url, img) + yield Message.Directory, "", img + yield Message.Url, url, img + + def _pagination(self, base_url, pnum=None): + if pnum is None: + url = base_url + pnum = 1 + else: + url = f"{base_url}/{pnum}" + + while True: + page = self.request(url).text + + yield page + + if pnum is None or "Next" in page: + return + pnum += 1 + url = f"{base_url}/{pnum}" + + def _extract_image(self, image_id): + url = f"{self.root}/viewimage/{image_id}h" + page = self.request(url).text + extr = text.extract_from(page) + + return { + "id" : image_id, + "url" : extr("
", "<")), + "date" : self.parse_datetime(extr( + " ago on ", "<"), "%d %B %Y %H:%M"), + } + + +class ListalImageExtractor(ListalExtractor): + """Extractor for listal pictures""" + subcategory = "image" + pattern = BASE_PATTERN + r"/viewimage/(\d+)" + example = "https://www.listal.com/viewimage/12345678" + + def image_ids(self): + return (self.groups[0],) + + +class ListalPeopleExtractor(ListalExtractor): + """Extractor for listal people pictures""" + subcategory = "people" + pattern = BASE_PATTERN + r"/([^/?#]+)/pictures" + example = "https://www.listal.com/NAME/pictures" + + def image_ids(self): + url = f"{self.root}/{self.groups[0]}/pictures" + for page in self._pagination(url): + yield from text.extract_iter(page, "listal.com/viewimage/", "'") diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index f674c8e5..c46827eb 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -240,6 +240,7 @@ SUBCATEGORY_MAP = { "issue" : "Comic Issues", "manga" : "Manga", "media" : "Media Files", + "people" : "People", "popular": "Popular Images", "recent" : "Recent Images", "saved" : "Saved Posts", diff --git a/test/results/listal.py b/test/results/listal.py new file mode 100644 index 00000000..a8ea21ce --- /dev/null +++ b/test/results/listal.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import listal + + +__tests__ = ( +{ + "#url" : "https://www.listal.com/viewimage/29620846", + "#class" : listal.ListalImageExtractor, + "#results" : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg", + + "author" : "sinaia16", + "author_url": "https://sinaia16.listal.com", + "date" : "dt:2024-07-18 18:50:00", + "extension" : "jpg", + "filename" : "1030full-jim-carrey", + "height" : 1037, + "id" : "29620846", + "title" : "Jim Carrey", + "url" : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg", + "width" : 1030, +}, + +{ + "#url" : "https://www.listal.com/jim-carrey/pictures", + "#class" : listal.ListalPeopleExtractor, + "#pattern" : r"https://i\w+\.lisimg\.com/image/\d+/\d+full-.+\.jpg", + "#range" : "1-10", + "#count" : 10, + + "author" : str, + "author_url": r"re:https://\w+.listal.com", + "date" : "type:datetime", + "extension" : "jpg", + "filename" : str, + "width" : range(200, 2000), + "height" : range(200, 2000), + "id" : r"re:\d+", + "title" : "Jim Carrey", + "url" : r"re:https://.+", +}, + +)