[agnph] add 'tag' and 'post' extractors (#5284, #5890)

This commit is contained in:
Mike Fährmann
2024-07-27 12:17:47 +02:00
parent 304bb4bbb8
commit 226ead728e
4 changed files with 139 additions and 0 deletions

View File

@@ -91,6 +91,12 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries</td>
<td></td>
</tr>
<tr>
<td>Agnph</td>
<td>https://agn.ph/</td>
<td>Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Architizer</td>
<td>https://architizer.com/</td>

View File

@@ -22,6 +22,7 @@ modules = [
"8chan",
"8muses",
"adultempire",
"agnph",
"architizer",
"artstation",
"aryion",

View File

@@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
# Copyright 2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://agn.ph/"""
from . import booru
from .. import text
from xml.etree import ElementTree
BASE_PATTERN = r"(?:https?://)?agn\.ph"
class AgnphExtractor(booru.BooruExtractor):
category = "agnph"
root = "https://agn.ph"
page_start = 1
per_page = 45
def _prepare(self, post):
post["date"] = text.parse_timestamp(post["created_at"])
post["status"] = post["status"].strip()
post["has_children"] = ("true" in post["has_children"])
def _xml_to_dict(self, xml):
return {element.tag: element.text for element in xml}
def _pagination(self, url, params):
params["api"] = "xml"
if "page" in params:
params["page"] = \
self.page_start + text.parse_int(params["page"]) - 1
else:
params["page"] = self.page_start
while True:
data = self.request(url, params=params).text
root = ElementTree.fromstring(data)
yield from map(self._xml_to_dict, root)
attrib = root.attrib
if int(attrib["offset"]) + len(root) >= int(attrib["count"]):
return
params["page"] += 1
class AgnphTagExtractor(AgnphExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/gallery/post/(?:\?([^#]+))?$"
example = "https://agn.ph/gallery/post/?search=TAG"
def __init__(self, match):
AgnphExtractor.__init__(self, match)
self.params = text.parse_query(self.groups[0])
def metadata(self):
return {"search_tags": self.params.get("search") or ""}
def posts(self):
url = self.root + "/gallery/post/"
return self._pagination(url, self.params.copy())
class AgnphPostExtractor(AgnphExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/gallery/post/show/(\d+)"
example = "https://agn.ph/gallery/post/show/12345/"
def posts(self):
url = "{}/gallery/post/show/{}/?api=xml".format(
self.root, self.groups[0])
post = ElementTree.fromstring(self.request(url).text)
return (self._xml_to_dict(post),)

49
test/results/agnph.py Normal file
View File

@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import agnph
__tests__ = (
{
"#url" : "https://agn.ph/gallery/post/?search=azuu",
"#category": ("booru", "agnph", "tag"),
"#class" : agnph.AgnphTagExtractor,
"#pattern" : r"http://agn\.ph/gallery/data/../../\w{32}\.jpg",
"#count" : ">= 50",
},
{
"#url" : "https://agn.ph/gallery/post/show/501604/",
"#category": ("booru", "agnph", "post"),
"#class" : agnph.AgnphPostExtractor,
"#urls" : "http://agn.ph/gallery/data/7d/a5/7da50021f3e86f6cf1c215652060d772.png",
"#sha1_content": "93c8b2d3f53e891ad8fa68d5f60f8c7a70acd836",
"artist" : "reyn_goldfur",
"created_at" : "1722041591",
"creator_id" : "-1",
"date" : "dt:2024-07-27 00:53:11",
"description" : None,
"fav_count" : "0",
"file_ext" : "png",
"file_url" : "http://agn.ph/gallery/data/7d/a5/7da50021f3e86f6cf1c215652060d772.png",
"has_children": False,
"height" : "1000",
"id" : "501604",
"md5" : "7da50021f3e86f6cf1c215652060d772",
"num_comments": "0",
"parent_id" : None,
"rating" : "e",
"source" : "https://inkbunny.net/s/2886519",
"status" : "approved",
"tags" : "anthro female hisuian_sneasel regional_form reyn_goldfur shelly_the_sneasel sneasel solo",
"thumbnail_url": "http://agn.ph/gallery/data/thumb/7d/a5/7da50021f3e86f6cf1c215652060d772.png",
"width" : "953",
},
)