From 952fcb1b8bf3bc1c2a1642e01f79a3e48f4136c9 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Wed, 30 Jul 2025 11:38:33 -0600 Subject: [PATCH] [xasiat] add support (#4161 #5929 #7934) * [xasiat] Album extractor * [xasiat] Tags, categories, Models * [xasiat] Tests * update 'album' extractor - provide 'album_id' metadata - use redirected 'album_url' value - update metadata extraction in general * extend test result data --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/xasiat.py | 103 +++++++++++++++++++++++++++++++ test/results/xasiat.py | 85 +++++++++++++++++++++++++ 4 files changed, 195 insertions(+) create mode 100644 gallery_dl/extractor/xasiat.py create mode 100644 test/results/xasiat.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2330928f..2118af66 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1175,6 +1175,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + Xasiat + https://www.xasiat.com + Albums, Categories, Models, Tag Searches + + Xfolio https://xfolio.jp/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 804fb02d..70e79fe9 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -211,6 +211,7 @@ modules = [ "wikiart", "wikifeet", "wikimedia", + "xasiat", "xfolio", "xhamster", "xvideos", diff --git a/gallery_dl/extractor/xasiat.py b/gallery_dl/extractor/xasiat.py new file mode 100644 index 00000000..6aa31685 --- /dev/null +++ b/gallery_dl/extractor/xasiat.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.xasiat.com""" + +from .common import Extractor, Message +from .. import text, util +import time + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?xasiat\.com((?:/fr|/ja)?/albums" + + +class XasiatExtractor(Extractor): + category = "xasiat" + directory_fmt = ("{category}", "{title}") + archive_fmt = "{album_url}_{num}" + root = "https://www.xasiat.com" + + def items(self): + data = {"_extractor": XasiatAlbumExtractor} + for url in self.posts(): + yield Message.Queue, url, data + + def posts(self): + return self._pagination(self.groups[0]) + + def _pagination(self, path, pnum=1): + url = f"{self.root}{path}/" + find_posts = util.re(r'class="item ">\s*Next" in page: + return + + pnum += 1 + + +class XasiatAlbumExtractor(XasiatExtractor): + subcategory = "album" + pattern = BASE_PATTERN + r"/(\d+)/[^/?#]+)" + example = "https://www.xasiat.com/albums/12345/TITLE/" + + def items(self): + path, album_id = self.groups + url = f"{self.root}{path}/" + response = self.request(url) + extr = text.extract_from(response.text) + + title = extr("

", "<") + info = extr('class="info-content"', "") + images = extr('class="images"', "") + + urls = list(text.extract_iter(images, 'href="', '"')) + + data = { + "title": text.unescape(title), + "model": util.re( + r'top_models1">\s*(.+)\s*\s*(.+)\s*\s*(.+)\s*