From f13ab25233adfa7f1e02f3ff9584bd4ecd8897fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 15 Sep 2025 20:19:24 +0200 Subject: [PATCH] [imgpile] add support (#5044) --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/imgpile.py | 119 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + test/results/imgpile.py | 59 +++++++++++++++ 5 files changed, 186 insertions(+) create mode 100644 gallery_dl/extractor/imgpile.py create mode 100644 test/results/imgpile.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 71ec3701..e881d7c1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -457,6 +457,12 @@ Consider all listed sites to potentially be NSFW. Galleries, individual Images + + imgpile + https://imgpile.com/ + Posts, User Profiles + + imgth https://imgth.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index b32fcd11..ef663a35 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -88,6 +88,7 @@ modules = [ "imagefap", "imgbb", "imgbox", + "imgpile", "imgth", "imgur", "imhentai", diff --git a/gallery_dl/extractor/imgpile.py b/gallery_dl/extractor/imgpile.py new file mode 100644 index 00000000..9fc3a9c6 --- /dev/null +++ b/gallery_dl/extractor/imgpile.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://imgpile.com/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgpile\.com" + + +class ImgpileExtractor(Extractor): + """Base class for imgpile extractors""" + category = "imgpile" + root = "https://imgpile.com" + directory_fmt = ("{category}", "{post[author]}", + "{post[title]} ({post[id_slug]})") + archive_fmt = "{post[id_slug]}_{id}" + + def items(self): + pass + + +class ImgpilePostExtractor(ImgpileExtractor): + subcategory = "post" + pattern = rf"{BASE_PATTERN}/p/(\w+)" + example = "https://imgpile.com/p/AbCdEfG" + + def items(self): + post_id = self.groups[0] + url = f"{self.root}/p/{post_id}" + page = self.request(url).text + extr = text.extract_from(page) + + post = { + "id_slug": post_id, + "title" : text.unescape(extr("", " - imgpile<")), + "id" : text.parse_int(extr('data-post-id="', '"')), + "author" : extr('/u/', '"'), + "score" : text.parse_int(text.remove_html(extr( + 'class="post-score">', "</"))), + "views" : text.parse_int(extr( + 'class="meta-value">', "<").replace(",", "")), + "tags" : text.split_html(extr( + " <!-- Tags -->", '<!-- "')), + } + + files = self._extract_files(extr) + data = {"post": post} + data["count"] = post["count"] = len(files) + + yield Message.Directory, data + for data["num"], file in enumerate(files, 1): + data.update(file) + url = file["url"] + yield Message.Url, url, text.nameext_from_url(url, data) + + def _extract_files(self, extr): + files = [] + + while True: + media = extr('lass="post-media', '</div>') + if not media: + break + files.append({ + "id_slug": text.extr(media, 'data-id="', '"'), + "id" : text.parse_int(text.extr( + media, 'data-media-id="', '"')), + "url": f"""http{text.extr(media, '<a href="http', '"')}""", + }) + return files + + +class ImgpileUserExtractor(ImgpileExtractor): + subcategory = "user" + pattern = rf"{BASE_PATTERN}/u/([^/?#]+)" + example = "https://imgpile.com/u/USER" + + def items(self): + url = f"{self.root}/api/v1/posts" + params = { + "limit" : "100", + "sort" : "latest", + "period" : "all", + "visibility": "public", + # "moderation_status": "approved", + "username" : self.groups[0], + } + headers = { + "Accept" : "application/json", + # "Referer" : "https://imgpile.com/u/USER", + "Content-Type" : "application/json", + # "X-CSRF-TOKEN": "", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + } + + base = f"{self.root}/p/" + while True: + data = self.request_json(url, params=params, headers=headers) + + if params is not None: + params = None + self.kwdict["total"] = data["meta"]["total"] + + for item in data["data"]: + item["_extractor"] = ImgpilePostExtractor + url = f"{base}{item['slug']}" + yield Message.Queue, url, item + + url = data["links"].get("next") + if not url: + return diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 70ecda25..a7239893 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -90,6 +90,7 @@ CATEGORY_MAP = { "imagechest" : "ImageChest", "imgdrive" : "ImgDrive.net", "imgkiwi" : "IMG.Kiwi", + "imgpile" : "imgpile", "imgtaxi" : "ImgTaxi.com", "imgth" : "imgth", "imgur" : "imgur", diff --git a/test/results/imgpile.py b/test/results/imgpile.py new file mode 100644 index 00000000..d20e6ec1 --- /dev/null +++ b/test/results/imgpile.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import imgpile + + +__tests__ = ( +{ + "#url" : "https://imgpile.com/p/bbjiXrl", + "#class" : imgpile.ImgpilePostExtractor, + "#results" : ( + "https://cdn.imgpile.com/f/BobTUou.jpg", + "https://cdn.imgpile.com/f/Wr9cQFK.jpg", + "https://cdn.imgpile.com/f/VevZbjw.png", + ), + + "id" : {3518940, 3518941, 3518942}, + "id_slug" : {"BobTUou", "Wr9cQFK", "VevZbjw"}, + "count" : 3, + "num" : range(1, 3), + "filename" : str, + "extension": {"jpg", "png"}, + "url" : r"re:https://cdn.imgpile.com/f/\w+\.(jpg|png)", + "post" : { + "author" : "zilla_64", + "count" : 3, + "id" : 105411, + "id_slug": "bbjiXrl", + "score" : range(-5, 5), + "title" : "Mecha-King Ghidorah scans", + "views" : range(8_300, 12_000), + "tags" : [ + "text", + "description", + "Godzilla", + "battle", + "story", + "article", + "monsters", + "characters", + "device", + "time", + "space", + "mecha", + ], + }, +}, + +{ + "#url" : "https://imgpile.com/u/zilla_64", + "#class" : imgpile.ImgpileUserExtractor, + "#pattern" : imgpile.ImgpilePostExtractor.pattern, + "#count" : 16, +}, + +)