diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 241b1126..36226654 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -91,6 +91,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + Agnph + https://agn.ph/ + Posts, Tag Searches + + Architizer https://architizer.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6aff1f3f..beb6fb47 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -22,6 +22,7 @@ modules = [ "8chan", "8muses", "adultempire", + "agnph", "architizer", "artstation", "aryion", diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py new file mode 100644 index 00000000..7bceb9a6 --- /dev/null +++ b/gallery_dl/extractor/agnph.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://agn.ph/""" + +from . import booru +from .. import text + +from xml.etree import ElementTree + +BASE_PATTERN = r"(?:https?://)?agn\.ph" + + +class AgnphExtractor(booru.BooruExtractor): + category = "agnph" + root = "https://agn.ph" + page_start = 1 + per_page = 45 + + def _prepare(self, post): + post["date"] = text.parse_timestamp(post["created_at"]) + post["status"] = post["status"].strip() + post["has_children"] = ("true" in post["has_children"]) + + def _xml_to_dict(self, xml): + return {element.tag: element.text for element in xml} + + def _pagination(self, url, params): + params["api"] = "xml" + if "page" in params: + params["page"] = \ + self.page_start + text.parse_int(params["page"]) - 1 + else: + params["page"] = self.page_start + + while True: + data = self.request(url, params=params).text + root = ElementTree.fromstring(data) + + yield from map(self._xml_to_dict, root) + + attrib = root.attrib + if int(attrib["offset"]) + len(root) >= int(attrib["count"]): + return + + params["page"] += 1 + + +class AgnphTagExtractor(AgnphExtractor): + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + archive_fmt = "t_{search_tags}_{id}" + pattern = BASE_PATTERN + r"/gallery/post/(?:\?([^#]+))?$" + example = "https://agn.ph/gallery/post/?search=TAG" + + def __init__(self, match): + AgnphExtractor.__init__(self, match) + self.params = text.parse_query(self.groups[0]) + + def metadata(self): + return {"search_tags": self.params.get("search") or ""} + + def posts(self): + url = self.root + "/gallery/post/" + return self._pagination(url, self.params.copy()) + + +class AgnphPostExtractor(AgnphExtractor): + subcategory = "post" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/gallery/post/show/(\d+)" + example = "https://agn.ph/gallery/post/show/12345/" + + def posts(self): + url = "{}/gallery/post/show/{}/?api=xml".format( + self.root, self.groups[0]) + post = ElementTree.fromstring(self.request(url).text) + return (self._xml_to_dict(post),) diff --git a/test/results/agnph.py b/test/results/agnph.py new file mode 100644 index 00000000..53f5dafb --- /dev/null +++ b/test/results/agnph.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import agnph + + +__tests__ = ( +{ + "#url" : "https://agn.ph/gallery/post/?search=azuu", + "#category": ("booru", "agnph", "tag"), + "#class" : agnph.AgnphTagExtractor, + "#pattern" : r"http://agn\.ph/gallery/data/../../\w{32}\.jpg", + "#count" : ">= 50", +}, + +{ + "#url" : "https://agn.ph/gallery/post/show/501604/", + "#category": ("booru", "agnph", "post"), + "#class" : agnph.AgnphPostExtractor, + "#urls" : "http://agn.ph/gallery/data/7d/a5/7da50021f3e86f6cf1c215652060d772.png", + "#sha1_content": "93c8b2d3f53e891ad8fa68d5f60f8c7a70acd836", + + "artist" : "reyn_goldfur", + "created_at" : "1722041591", + "creator_id" : "-1", + "date" : "dt:2024-07-27 00:53:11", + "description" : None, + "fav_count" : "0", + "file_ext" : "png", + "file_url" : "http://agn.ph/gallery/data/7d/a5/7da50021f3e86f6cf1c215652060d772.png", + "has_children": False, + "height" : "1000", + "id" : "501604", + "md5" : "7da50021f3e86f6cf1c215652060d772", + "num_comments": "0", + "parent_id" : None, + "rating" : "e", + "source" : "https://inkbunny.net/s/2886519", + "status" : "approved", + "tags" : "anthro female hisuian_sneasel regional_form reyn_goldfur shelly_the_sneasel sneasel solo", + "thumbnail_url": "http://agn.ph/gallery/data/thumb/7d/a5/7da50021f3e86f6cf1c215652060d772.png", + "width" : "953", + +}, + +)