From f34210828062e5c7a481cce7d06d029d52ae6409 Mon Sep 17 00:00:00 2001 From: nunonda Date: Wed, 23 Apr 2025 17:37:40 -0700 Subject: [PATCH] Adding in a first pass at a pictoa extractor Adds support for galleries and individual Images --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/pictoa.py | 110 +++++++++++++++++++++++++++++++ test/results/pictoa.py | 49 ++++++++++++++ 4 files changed, 166 insertions(+) create mode 100644 gallery_dl/extractor/pictoa.py create mode 100644 test/results/pictoa.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 95dd34ec..1e3e66d3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -691,6 +691,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + Pictoa + https://pictoa.com/ + Galleries, individual Images + + Piczel https://piczel.tv/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d0476c9a..6dd978ea 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -129,6 +129,7 @@ modules = [ "pexels", "philomena", "photovogue", + "pictoa", "picarto", "piczel", "pillowfort", diff --git a/gallery_dl/extractor/pictoa.py b/gallery_dl/extractor/pictoa.py new file mode 100644 index 00000000..14991114 --- /dev/null +++ b/gallery_dl/extractor/pictoa.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://pictoa.com/""" + +from .common import Extractor, Message +from .. import text +import re + +BASE_PATTERN = r"(?:https?://)?(?:[\w]+\.)?pictoa\.com(?:\.de)?" + +class PictoaExtractor(Extractor): + """Base class for pictoa extractors""" + category = "pictoa" + root = "https://pictoa.com" + +class PictoaImageExtractor(PictoaExtractor): + """Extractor for single images from pictoa.com""" + subcategory = "image" + pattern = BASE_PATTERN + r"/albums/([^/\.?#]+)/([^/\.?#]+).html" + filename_fmt = "{id}.{extension}" + directory_fmt = ("{category}", "{album[id]}") + archive_fmt = "{image_id}" + example = "https://www.pictoa.com/albums/name-2693203/12345.html" + + def __init__(self, match): + PictoaExtractor.__init__(self, match) + self.album_id = match.group(1) + self.image_id = match.group(2) + + def items(self): + url = f"{self.root}/albums/{self.album_id}/{self.image_id}.html" + page = self.request(url).text + container = text.extract(page, '
")[0] + album_title = text.extract(page, '', '')[0] + + # grab the id out of the title (handiest place to get it) + htmltitle = text.extract(page, '', '')[0] + album_id = text.extract(htmltitle, '#', ' ')[0] + + # tags + taghunk = text.extract(page, '