* listal extractor * add listal to init * fix flake8 & formatting & extractor names/subcategories * remove 're' import * remove 'datetime' import * update & simplify extractors * update supportedsites * add tests --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -619,6 +619,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Galleries</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="listal" title="listal">
|
||||
<td>Listal</td>
|
||||
<td>https://listal.com</td>
|
||||
<td>individual Images, People</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="livedoor" title="livedoor">
|
||||
<td>livedoor Blog</td>
|
||||
<td>http://blog.livedoor.jp/</td>
|
||||
|
||||
@@ -119,6 +119,7 @@ modules = [
|
||||
"lensdump",
|
||||
"lexica",
|
||||
"lightroom",
|
||||
"listal",
|
||||
"livedoor",
|
||||
"lofter",
|
||||
"luscious",
|
||||
|
||||
85
gallery_dl/extractor/listal.py
Normal file
85
gallery_dl/extractor/listal.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://listal.com"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?listal\.com"
|
||||
|
||||
|
||||
class ListalExtractor(Extractor):
|
||||
"""Base class for Listal extractor"""
|
||||
category = "listal"
|
||||
root = "https://www.listal.com"
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{id}_{filename}.{extension}"
|
||||
archive_fmt = "{id}/{filename}"
|
||||
|
||||
def items(self):
|
||||
for image_id in self.image_ids():
|
||||
img = self._extract_image(image_id)
|
||||
url = img["url"]
|
||||
text.nameext_from_url(url, img)
|
||||
yield Message.Directory, "", img
|
||||
yield Message.Url, url, img
|
||||
|
||||
def _pagination(self, base_url, pnum=None):
|
||||
if pnum is None:
|
||||
url = base_url
|
||||
pnum = 1
|
||||
else:
|
||||
url = f"{base_url}/{pnum}"
|
||||
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
|
||||
yield page
|
||||
|
||||
if pnum is None or "<span class='nextprev'>Next" in page:
|
||||
return
|
||||
pnum += 1
|
||||
url = f"{base_url}/{pnum}"
|
||||
|
||||
def _extract_image(self, image_id):
|
||||
url = f"{self.root}/viewimage/{image_id}h"
|
||||
page = self.request(url).text
|
||||
extr = text.extract_from(page)
|
||||
|
||||
return {
|
||||
"id" : image_id,
|
||||
"url" : extr("<div><center><img src='", "'"),
|
||||
"title" : text.unescape(extr('title="', '"')),
|
||||
"width" : text.parse_int(extr("width='", "'")),
|
||||
"height" : text.parse_int(extr("height='", "'")),
|
||||
"author_url": extr("Added by <a href='", "'"),
|
||||
"author" : text.unescape(extr(">", "<")),
|
||||
"date" : self.parse_datetime(extr(
|
||||
" ago on ", "<"), "%d %B %Y %H:%M"),
|
||||
}
|
||||
|
||||
|
||||
class ListalImageExtractor(ListalExtractor):
|
||||
"""Extractor for listal pictures"""
|
||||
subcategory = "image"
|
||||
pattern = BASE_PATTERN + r"/viewimage/(\d+)"
|
||||
example = "https://www.listal.com/viewimage/12345678"
|
||||
|
||||
def image_ids(self):
|
||||
return (self.groups[0],)
|
||||
|
||||
|
||||
class ListalPeopleExtractor(ListalExtractor):
|
||||
"""Extractor for listal people pictures"""
|
||||
subcategory = "people"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/pictures"
|
||||
example = "https://www.listal.com/NAME/pictures"
|
||||
|
||||
def image_ids(self):
|
||||
url = f"{self.root}/{self.groups[0]}/pictures"
|
||||
for page in self._pagination(url):
|
||||
yield from text.extract_iter(page, "listal.com/viewimage/", "'")
|
||||
@@ -240,6 +240,7 @@ SUBCATEGORY_MAP = {
|
||||
"issue" : "Comic Issues",
|
||||
"manga" : "Manga",
|
||||
"media" : "Media Files",
|
||||
"people" : "People",
|
||||
"popular": "Popular Images",
|
||||
"recent" : "Recent Images",
|
||||
"saved" : "Saved Posts",
|
||||
|
||||
47
test/results/listal.py
Normal file
47
test/results/listal.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import listal
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.listal.com/viewimage/29620846",
|
||||
"#class" : listal.ListalImageExtractor,
|
||||
"#results" : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg",
|
||||
|
||||
"author" : "sinaia16",
|
||||
"author_url": "https://sinaia16.listal.com",
|
||||
"date" : "dt:2024-07-18 18:50:00",
|
||||
"extension" : "jpg",
|
||||
"filename" : "1030full-jim-carrey",
|
||||
"height" : 1037,
|
||||
"id" : "29620846",
|
||||
"title" : "Jim Carrey",
|
||||
"url" : "https://ilarge.lisimg.com/image/29620846/1030full-jim-carrey.jpg",
|
||||
"width" : 1030,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.listal.com/jim-carrey/pictures",
|
||||
"#class" : listal.ListalPeopleExtractor,
|
||||
"#pattern" : r"https://i\w+\.lisimg\.com/image/\d+/\d+full-.+\.jpg",
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
|
||||
"author" : str,
|
||||
"author_url": r"re:https://\w+.listal.com",
|
||||
"date" : "type:datetime",
|
||||
"extension" : "jpg",
|
||||
"filename" : str,
|
||||
"width" : range(200, 2000),
|
||||
"height" : range(200, 2000),
|
||||
"id" : r"re:\d+",
|
||||
"title" : "Jim Carrey",
|
||||
"url" : r"re:https://.+",
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user