# -*- coding: utf-8 -*- # Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://idol.sankakucomplex.com/""" from .sankaku import SankakuExtractor from .common import Message from .. import text, util, exception import collections import random import time import re class IdolcomplexExtractor(SankakuExtractor): """Base class for idolcomplex extractors""" category = "idolcomplex" cookiedomain = "idol.sankakucomplex.com" root = "https://" + cookiedomain subdomain = "idol" def __init__(self, match): SankakuExtractor.__init__(self, match) self.logged_in = True self.start_page = 1 self.start_post = 0 self.extags = self.config("tags", False) self.wait_min = self.config("wait-min", 3.0) self.wait_max = self.config("wait-max", 6.0) if self.wait_max < self.wait_min: self.wait_max = self.wait_min def items(self): self.login() data = self.metadata() for post_id in util.advance(self.post_ids(), self.start_post): self.wait() post = self._parse_post(post_id) url = post["file_url"] post.update(data) text.nameext_from_url(url, post) yield Message.Directory, post yield Message.Url, url, post def skip(self, num): self.start_post += num return num def post_ids(self): """Return an iterable containing all relevant post ids""" def _parse_post(self, post_id): """Extract metadata of a single post""" url = self.root + "/post/show/" + post_id page = self.request(url, retries=10).text extr = text.extract tags , pos = extr(page, "