From 0466fcab4cd8841dfd14ce84a8bff9cc822123f6 Mon Sep 17 00:00:00 2001 From: hdk5 Date: Thu, 5 Dec 2024 17:38:57 +0200 Subject: [PATCH 1/2] [lofter]: add initial support --- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/lofter.py | 150 +++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 gallery_dl/extractor/lofter.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 594ce41a..0201b781 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -98,6 +98,7 @@ modules = [ "lexica", "lightroom", "livedoor", + "lofter", "luscious", "lynxchan", "mangadex", diff --git a/gallery_dl/extractor/lofter.py b/gallery_dl/extractor/lofter.py new file mode 100644 index 00000000..bb167761 --- /dev/null +++ b/gallery_dl/extractor/lofter.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.lofter.com/""" + +from .common import Extractor, Message +from .. import text, util, exception + + +class LofterExtractor(Extractor): + """Base class for lofter extractors""" + category = "lofter" + root = "https://www.lofter.com" + directory_fmt = ("{category}", "{blog_name}") + filename_fmt = "{id}_{num}.{extension}" + archive_fmt = "{id}_{num}" + + def _init(self): + self.api = LofterAPI(self) + + def items(self): + for post in self.posts(): + if "post" in post: + post = post["post"] + + post["blog_name"] = post["blogInfo"]["blogName"] + + post_type = post["type"] + image_urls = [] + + # Article + if post_type == 1: + content = post["content"] + image_urls = text.extract_iter(content, ' + r"www\.lofter\.com/front/blog/home-page/([\w-]+)|" + # https://.lofter.com/ + r"([\w-]+)\.lofter\.com" + r")") + example = "https://blog_name.lofter.com/" + + def posts(self): + blog_name = self.groups[0] or self.groups[1] + posts = self.api.blog_posts(blog_name) + return posts + + +class LofterAPI(): + def __init__(self, extractor): + self.extractor = extractor + + def _call(self, endpoint, data): + url = "https://api.lofter.com{}".format(endpoint) + params = { + 'product': 'lofter-android-7.9.10' + } + response = self.extractor.request( + url, method="POST", params=params, data=data) + info = response.json() + + if info["meta"]["status"] != 200: + self.extractor.log.debug("Server response: %s", info) + raise exception.StopExtraction("API request failed") + + return info + + def blog_posts(self, blog_name): + endpoint = "/v2.0/blogHomePage.api" + params = { + "method": "getPostLists", + "offset": 0, + "limit": 200, + "blogdomain": "{}.lofter.com".format(blog_name), + } + + while True: + data = self._call(endpoint, params) + posts = data["response"]["posts"] + + for post in posts: + yield post + + if params["offset"] + len(posts) < data["response"]["offset"]: + break + + params["offset"] = data["response"]["offset"] + + def post(self, blog_id, post_id): + endpoint = "/oldapi/post/detail.api" + params = { + "targetblogid": blog_id, + "postid": post_id, + } + data = self._call(endpoint, params) + posts = data["response"]["posts"] + post = posts[0] + return post From 717081dabd945359bca1665424930c784b7ac1af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 11 Dec 2024 20:39:01 +0100 Subject: [PATCH 2/2] [lofter] update - add tests - update docs/supportedsites - provide 'date' metadata - simplify/restructure some code --- docs/supportedsites.md | 6 +++ gallery_dl/extractor/lofter.py | 67 ++++++++++++++++------------------ scripts/supportedsites.py | 4 ++ test/results/lofter.py | 59 ++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 35 deletions(-) create mode 100644 test/results/lofter.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f779217e..a6b5cfb1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -547,6 +547,12 @@ Consider all listed sites to potentially be NSFW. Blogs, Posts + + LOFTER + https://www.lofter.com/ + Blog Posts, Posts + + Luscious https://members.luscious.net/ diff --git a/gallery_dl/extractor/lofter.py b/gallery_dl/extractor/lofter.py index bb167761..412b6b9f 100644 --- a/gallery_dl/extractor/lofter.py +++ b/gallery_dl/extractor/lofter.py @@ -27,9 +27,8 @@ class LofterExtractor(Extractor): post = post["post"] post["blog_name"] = post["blogInfo"]["blogName"] - + post["date"] = text.parse_timestamp(post["publishTime"] // 1000) post_type = post["type"] - image_urls = [] # Article if post_type == 1: @@ -56,6 +55,7 @@ class LofterExtractor(Extractor): image_urls = [x.partition("?")[0] for x in image_urls] else: + image_urls = () self.log.warning( "%s: Unsupported post type '%s'.", post["id"], post_type) @@ -73,7 +73,7 @@ class LofterPostExtractor(LofterExtractor): """Extractor for a lofter post""" subcategory = "post" pattern = r"(?:https?://)?[\w-]+\.lofter\.com/post/([0-9a-f]+)_([0-9a-f]+)" - example = "https://blog_name.lofter.com/post/12345678_90abcdef" + example = "https://BLOG.lofter.com/post/12345678_90abcdef" def posts(self): blog_id, post_id = self.groups @@ -89,21 +89,39 @@ class LofterBlogPostsExtractor(LofterExtractor): r"www\.lofter\.com/front/blog/home-page/([\w-]+)|" # https://.lofter.com/ r"([\w-]+)\.lofter\.com" - r")") - example = "https://blog_name.lofter.com/" + r")/?(?:$|\?|#)") + example = "https://BLOG.lofter.com/" def posts(self): blog_name = self.groups[0] or self.groups[1] - posts = self.api.blog_posts(blog_name) - return posts + return self.api.blog_posts(blog_name) class LofterAPI(): + def __init__(self, extractor): self.extractor = extractor + def blog_posts(self, blog_name): + endpoint = "/v2.0/blogHomePage.api" + params = { + "method": "getPostLists", + "offset": 0, + "limit": 200, + "blogdomain": blog_name + ".lofter.com", + } + return self._pagination(endpoint, params) + + def post(self, blog_id, post_id): + endpoint = "/oldapi/post/detail.api" + params = { + "targetblogid": blog_id, + "postid": post_id, + } + return self._call(endpoint, params)["posts"][0] + def _call(self, endpoint, data): - url = "https://api.lofter.com{}".format(endpoint) + url = "https://api.lofter.com" + endpoint params = { 'product': 'lofter-android-7.9.10' } @@ -115,36 +133,15 @@ class LofterAPI(): self.extractor.log.debug("Server response: %s", info) raise exception.StopExtraction("API request failed") - return info - - def blog_posts(self, blog_name): - endpoint = "/v2.0/blogHomePage.api" - params = { - "method": "getPostLists", - "offset": 0, - "limit": 200, - "blogdomain": "{}.lofter.com".format(blog_name), - } + return info["response"] + def _pagination(self, endpoint, params): while True: data = self._call(endpoint, params) - posts = data["response"]["posts"] + posts = data["posts"] - for post in posts: - yield post + yield from posts - if params["offset"] + len(posts) < data["response"]["offset"]: + if params["offset"] + len(posts) < data["offset"]: break - - params["offset"] = data["response"]["offset"] - - def post(self, blog_id, post_id): - endpoint = "/oldapi/post/detail.api" - params = { - "targetblogid": blog_id, - "postid": post_id, - } - data = self._call(endpoint, params) - posts = data["response"]["posts"] - post = posts[0] - return post + params["offset"] = data["offset"] diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index aafdb0d2..a1012665 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -86,6 +86,7 @@ CATEGORY_MAP = { "kemonoparty" : "Kemono", "koharu" : "SchaleNetwork", "livedoor" : "livedoor Blog", + "lofter" : "LOFTER", "ohpolly" : "Oh Polly", "omgmiamiswimwear": "Omg Miami Swimwear", "mangadex" : "MangaDex", @@ -265,6 +266,9 @@ SUBCATEGORY_MAP = { "lensdump": { "albums": "", }, + "lofter": { + "blog-posts": "Blog Posts", + }, "mangadex": { "feed" : "Followed Feed", }, diff --git a/test/results/lofter.py b/test/results/lofter.py new file mode 100644 index 00000000..99a8f570 --- /dev/null +++ b/test/results/lofter.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import lofter + + +__tests__ = ( +{ + "#url" : "https://gengar563.lofter.com/post/1e82da8c_1c98dae1b", + "#class": lofter.LofterPostExtractor, + "#urls" : ( + "https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJQ1RxY0lYaU1UUE9tQ0NvUE9rVXFpOFFEVzMwbnQ4aEFnPT0.jpg", + "https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJRWlXYTRVOEpXTU9TSGt3TjBDQ0JFZVpZMEJtWjFneVNBPT0.png", + "https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJR1d3Y2VvbTNTQlIvdFU1WWlqZHEzbjI4MFVNZVdoN3VBPT0.png", + "https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJTi83NDRDUjNvd3hySGxEZFovd2hwbi9oaG9NQ1hOUkZ3PT0.png", + "https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png", + "https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSStJZE9RYnJURktHazdIVHNNMjQ5eFJldHVTQy9XbDB3PT0.png", + "https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png", + ), + + "blog_name": "gengar563", + "content" : "

发了三次发不出有毒……

\n

二部运动au  性转ac注意

\n

失去耐心.jpg

", + "date" : "dt:2020-06-04 12:51:42", + "id" : 7676472859, +}, + +{ + "#url" : "https://wooden-brain.lofter.com/post/1e60de5b_1c9bf8efb", + "#comment": "video", + "#class" : lofter.LofterPostExtractor, + "#urls" : ( + "https://vodm2lzexwq.vod.126.net/vodm2lzexwq/Pc5jg1nL_3039990631_sd.mp4?resId=254486990bfa2cd7aa860229db639341_3039990631_1&sign=4j02HTHXqNfhaF%2B%2FO14Ny%2F9SMNZj%2FIjpJDCqXfYa4aM%3D", + ), + + "blog_name": "wooden-brain", + "date" : "dt:2020-06-24 11:01:59", + "id" : 7679741691, +}, + +{ + "#url" : "https://gengar563.lofter.com/", + "#class": lofter.LofterBlogPostsExtractor, + "#range": "1-25", + "#count": 25, + + "blog_name": "gengar563", + "date" : "type:datetime", + "id" : int, +}, + +{ + "#url" : "https://www.lofter.com/front/blog/home-page/gengar563", + "#class": lofter.LofterBlogPostsExtractor, +}, + +)