From f72318e593c399add259997400d31f4f8d7af258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 13 Nov 2017 20:46:24 +0100 Subject: [PATCH] [seiga] support more than 200 images Due to API restrictions and/or missing knowledge about and documentation of API usage, it was only possible to retrieve the latest 200 images of a niconico seiga user with said API. The new approach manually visits each HTML page and gets its information from there. --- gallery_dl/extractor/seiga.py | 46 ++++++++++++++++++----------------- test/test_extractors.py | 1 + 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py index 4d917b2e..d0de92d9 100644 --- a/gallery_dl/extractor/seiga.py +++ b/gallery_dl/extractor/seiga.py @@ -11,7 +11,6 @@ from .common import Extractor, Message from .. import text, exception from ..cache import cache -from xml.etree import ElementTree class SeigaExtractor(Extractor): @@ -74,11 +73,12 @@ class SeigaUserExtractor(SeigaExtractor): r"user/illust/(\d+)")] test = [ ("http://seiga.nicovideo.jp/user/illust/39537793", { - "keyword": "a716bf534b4191dc58ddbff51494b72a9cf58285", + "pattern": r"https://lohas\.nicoseiga\.jp/priv/[0-9a-f]+/\d+/\d+", + "count": 2, }), ("http://seiga.nicovideo.jp/user/illust/79433", { "url": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "keyword": "187b77728381d072466af7f7ebcc479a0830ce25", + "count": 0, }), ] @@ -90,25 +90,27 @@ class SeigaUserExtractor(SeigaExtractor): return {"user_id": self.user_id} def get_images(self): - keymap = {0: "image_id", 2: "title", 3: "description", - 7: "summary", 8: "genre", 18: "date"} - url = "http://seiga.nicovideo.jp/api/user/data?id=" + self.user_id - response = self.request(url) - try: - root = ElementTree.fromstring(response.text) - except ElementTree.ParseError: - self.log.debug("xml parsing error; removing control characters") - xmldata = text.clean_xml(response.text) - root = ElementTree.fromstring(xmldata) - if root[0].text == "0": - return [] - return [ - { - key: image[index].text - for index, key in keymap.items() - } - for image in root[1] - ] + url = "http://seiga.nicovideo.jp/user/illust/" + self.user_id + params = {"target": "illust_all", "page": 1} + + while True: + cnt = 0 + page = self.request(url, params=params).text + + for info in text.extract_iter( + page, '
  • ', '
  • '), + ("views" , '', ''), + ("comments", '', ''), + ("clips" , '', ''), + ))[0] + cnt += 1 + + if cnt < 40: + return + params["page"] += 1 class SeigaImageExtractor(SeigaExtractor): diff --git a/test/test_extractors.py b/test/test_extractors.py index 791d7c27..ad268eeb 100644 --- a/test/test_extractors.py +++ b/test/test_extractors.py @@ -83,6 +83,7 @@ skip = [ "archivedmoe", "archiveofsins", "thebarchive", # temporary issues "mangazuki", + "hentaifoundry", # invalid SSL cert ] # enable selective testing for direct calls if __name__ == '__main__' and len(sys.argv) > 1: