diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 7e1c8de6..68c61c76 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -136,6 +136,7 @@ Imxto https://imx.to/ individual Images Pixhost https://pixhost.to/ individual Images Postimg https://postimages.org/ individual Images Turboimagehost https://www.turboimagehost.com/ individual Images +かべうち https://kabe-uchiroom.com/ User Profiles もえぴりあ https://vanilla-rock.com/ Posts, Tag-Searches 半次元 https://bcy.net/ Posts, User Profiles ==================== =================================== ================================================== ================ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index dc52959a..74c553d7 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -54,6 +54,7 @@ modules = [ "imgur", "instagram", "issuu", + "kabeuchi", "keenspot", "khinsider", "kissmanga", diff --git a/gallery_dl/extractor/kabeuchi.py b/gallery_dl/extractor/kabeuchi.py new file mode 100644 index 00000000..a8702f14 --- /dev/null +++ b/gallery_dl/extractor/kabeuchi.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://kabe-uchiroom.com/""" + +from .common import Extractor, Message +from .. import text, exception + + +class KabeuchiUserExtractor(Extractor): + """Extractor for all posts of a user on kabe-uchiroom.com""" + category = "kabeuchi" + subcategory = "user" + directory_fmt = ("{category}", "{twitter_user_id} {twitter_id}") + filename_fmt = "{id}_{num:>02}{title:?_//}.{extension}" + archive_fmt = "{id}_{num}" + root = "https://kabe-uchiroom.com" + pattern = r"(?:https?://)?kabe-uchiroom\.com/mypage/?\?id=(\d+)" + test = ( + ("https://kabe-uchiroom.com/mypage/?id=919865303848255493", { + "pattern": (r"https://kabe-uchiroom\.com/accounts/upfile/3/" + r"919865303848255493/\w+\.jpe?g"), + "count": ">= 24", + }), + ("https://kabe-uchiroom.com/mypage/?id=123456789", { + "exception": exception.NotFoundError, + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_id = match.group(1) + + def items(self): + base = "{}/accounts/upfile/{}/{}/".format( + self.root, self.user_id[-1], self.user_id) + keys = ("image1", "image2", "image3", "image4", "image5", "image6") + + for post in self.posts(): + if post.get("is_ad") or not post["image1"]: + continue + + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%d %H:%M:%S") + yield Message.Directory, post + + for key in keys: + name = post[key] + if not name: + break + url = base + name + post["num"] = ord(key[-1]) - 48 + yield Message.Url, url, text.nameext_from_url(name, post) + + def posts(self): + url = "{}/mypage/?id={}".format(self.root, self.user_id) + response = self.request(url) + if response.history and response.url == self.root + "/": + raise exception.NotFoundError("user") + target_id = text.extract(response.text, 'user_friend_id = "', '"')[0] + return self._pagination(target_id) + + def _pagination(self, target_id): + url = "{}/get_posts.php".format(self.root) + data = { + "user_id" : "0", + "target_id" : target_id, + "type" : "uploads", + "sort_type" : "0", + "category_id": "all", + "latest_post": "", + "page_num" : 0, + } + + while True: + info = self.request(url, method="POST", data=data).json() + datas = info["datas"] + + if not datas or not isinstance(datas, list): + return + yield from datas + + last_id = datas[-1]["id"] + if last_id == info["last_data"]: + return + data["latest_post"] = last_id + data["page_num"] += 1 diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 0e6e6b30..947b670f 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -45,6 +45,7 @@ CATEGORY_MAP = { "imgth" : "imgth", "imgur" : "imgur", "jaiminisbox" : "Jaimini's Box", + "kabeuchi" : "かべうち", "kireicake" : "Kirei Cake", "kissmanga" : "KissManga", "lineblog" : "LINE BLOG",