diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 4bb124d7..2f40592d 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -87,7 +87,7 @@ rule #34 https://rule34.paheal.net/ Posts, Tag-Searches Rule 34 https://rule34.xxx/ Pools, Posts, Tag-Searches Safebooru https://safebooru.org/ Pools, Posts, Tag-Searches Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional -Sankaku Complex https://www.sankakucomplex.com/ Articles +Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag-Searches Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga Sex.com https://www.sex.com/ Boards, Pins, Search Results diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py index 3d7c1dfb..3b91f05f 100644 --- a/gallery_dl/extractor/sankakucomplex.py +++ b/gallery_dl/extractor/sankakucomplex.py @@ -84,3 +84,35 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor): "num" : num, })) urls.add(url) + + +class SankakucomplexTagExtractor(SankakucomplexExtractor): + """Extractor for sankakucomplex blog articles by tag or author""" + subcategory = "tag" + pattern = (r"(?:https?://)?www\.sankakucomplex\.com" + r"/((?:tag|category|author)/[^/&?#]+)") + test = ( + ("https://www.sankakucomplex.com/tag/cosplay/", { + "range": "1-50", + "pattern": SankakucomplexArticleExtractor.pattern, + }), + ("https://www.sankakucomplex.com/category/anime/"), + ("https://www.sankakucomplex.com/author/rift/page/5/"), + ) + + def items(self): + pnum = 1 + last = None + data = {"_extractor": SankakucomplexArticleExtractor} + + while True: + url = "{}/{}/page/{}/".format(self.root, self.path, pnum) + response = self.request(url, expect=(404,)) + if response.status_code == 404: + return + for url in text.extract_iter(response.text, 'data-direct="', '"'): + if url != last: + last = url + yield Message.Queue, url, data + return + pnum += 1