diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1e3e66d3..48ec0f4b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -694,7 +694,7 @@ Consider all listed sites to potentially be NSFW. Pictoa https://pictoa.com/ - Galleries, individual Images + Albums, individual Images diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6dd978ea..9a7ca535 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -129,8 +129,8 @@ modules = [ "pexels", "philomena", "photovogue", - "pictoa", "picarto", + "pictoa", "piczel", "pillowfort", "pinterest", diff --git a/gallery_dl/extractor/pictoa.py b/gallery_dl/extractor/pictoa.py index 14991114..a8008cf8 100644 --- a/gallery_dl/extractor/pictoa.py +++ b/gallery_dl/extractor/pictoa.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- + # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. @@ -7,104 +8,71 @@ from .common import Extractor, Message from .. import text -import re BASE_PATTERN = r"(?:https?://)?(?:[\w]+\.)?pictoa\.com(?:\.de)?" + class PictoaExtractor(Extractor): """Base class for pictoa extractors""" category = "pictoa" root = "https://pictoa.com" + directory_fmt = ("{category}", "{album_id} {album_title}") + filename_fmt = "{id}.{extension}" + archive_fmt = "{id}" + class PictoaImageExtractor(PictoaExtractor): """Extractor for single images from pictoa.com""" subcategory = "image" - pattern = BASE_PATTERN + r"/albums/([^/\.?#]+)/([^/\.?#]+).html" - filename_fmt = "{id}.{extension}" - directory_fmt = ("{category}", "{album[id]}") - archive_fmt = "{image_id}" - example = "https://www.pictoa.com/albums/name-2693203/12345.html" - - def __init__(self, match): - PictoaExtractor.__init__(self, match) - self.album_id = match.group(1) - self.image_id = match.group(2) + pattern = BASE_PATTERN + r"/albums/(?:[\w-]+-)?(\d+)/(\d+)" + example = "https://www.pictoa.com/albums/NAME-12345/12345.html" def items(self): - url = f"{self.root}/albums/{self.album_id}/{self.image_id}.html" + album_id, image_id = self.groups + + url = "{}/albums/{}/{}.html".format(self.root, album_id, image_id) page = self.request(url).text - container = text.extract(page, '
")[0] - album_title = text.extract(page, '', '')[0] - - # grab the id out of the title (handiest place to get it) - htmltitle = text.extract(page, '', '')[0] - album_id = text.extract(htmltitle, '#', ' ')[0] - - # tags - taghunk = text.extract(page, '