From 0318c610dc505aadbbaa3cc171707c1156e1817b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 24 Apr 2019 22:10:01 +0200 Subject: [PATCH] [sexcom] add extractor for search results (#147) --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/sexcom.py | 35 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index f18990cc..08601d32 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -87,7 +87,7 @@ Safebooru https://safebooru.org/ Pools, Posts, Tag-Searc Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga -Sex.com https://www.sex.com/ Boards, Pins +Sex.com https://www.sex.com/ Boards, Pins, Search Results Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos SlideShare https://www.slideshare.net/ Presentations SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth) diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index a2f40dd1..aa2b16b4 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -157,3 +157,38 @@ class SexcomBoardExtractor(SexcomExtractor): def pins(self): url = "{}/user/{}/{}/".format(self.root, self.user, self.board) return self._pagination(url) + + +class SexcomSearchExtractor(SexcomExtractor): + """Extractor for search results on www.sex.com""" + subcategory = "search" + directory_fmt = ("{category}", "search", "{search[query]}") + pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:" + r"(pic|gif|video)s/([^/?&#]+)|search/(pic|gif|video)s" + r")/?(?:\?([^#]+))?)") + test = ( + ("https://www.sex.com/search/pics?query=ecchi", { + "range": "1-10", + "count": 10, + }), + ("https://www.sex.com/videos/hentai/", { + "range": "1-10", + "count": 10, + }), + ) + + def __init__(self, match): + SexcomExtractor.__init__(self, match) + self.path = match.group(1) + + self.search = text.parse_query(match.group(5)) + self.search["type"] = match.group(2) or match.group(4) + if "query" not in self.search: + self.search["query"] = match.group(3) or "" + + def metadata(self): + return {"search": self.search} + + def pins(self): + url = "{}/{}".format(self.root, self.path) + return self._pagination(url)