[fanbox] add 'metadata' option (#4921)

extracts 'plan' and extended 'user' metadata
This commit is contained in:
Mike Fährmann
2024-01-04 15:01:33 +01:00
parent 00570028a3
commit 7eaf648f2e
3 changed files with 107 additions and 6 deletions

View File

@@ -1569,7 +1569,7 @@ Default
``false``
Example
* ``notes,pools``
* ``["notes", "pools"``
* ``["notes", "pools"]``
Description
Extract additional metadata (notes, pool metadata) if available.
@@ -1711,6 +1711,21 @@ Description
* ``false``: Ignore embeds.
extractor.fanbox.metadata
-------------------------
Type
* ``bool``
* ``string``
* ``list`` of ``strings``
Default
``false``
Example
* ``user,plan``
* ``["user", "plan"]``
Description
Extract ``plan`` and extended ``user`` metadata.
extractor.flickr.access-token & .access-token-secret
----------------------------------------------------
Type

View File

@@ -8,6 +8,7 @@
from .common import Extractor, Message
from .. import text
from ..cache import memcache
import re
BASE_PATTERN = (
@@ -27,8 +28,20 @@ class FanboxExtractor(Extractor):
_warning = True
def _init(self):
self.headers = {"Origin": self.root}
self.embeds = self.config("embeds", True)
includes = self.config("metadata")
if includes:
if isinstance(includes, str):
includes = includes.split(",")
elif not isinstance(includes, (list, tuple)):
includes = ("user", "plan")
self._meta_user = ("user" in includes)
self._meta_plan = ("plan" in includes)
else:
self._meta_user = self._meta_plan = False
if self._warning:
if not self.cookies_check(("FANBOXSESSID",)):
self.log.warning("no 'FANBOXSESSID' cookie set")
@@ -43,11 +56,9 @@ class FanboxExtractor(Extractor):
"""Return all relevant post objects"""
def _pagination(self, url):
headers = {"Origin": self.root}
while url:
url = text.ensure_http_scheme(url)
body = self.request(url, headers=headers).json()["body"]
body = self.request(url, headers=self.headers).json()["body"]
for item in body["items"]:
try:
yield self._get_post_data(item["id"])
@@ -58,9 +69,8 @@ class FanboxExtractor(Extractor):
def _get_post_data(self, post_id):
"""Fetch and process post data"""
headers = {"Origin": self.root}
url = "https://api.fanbox.cc/post.info?postId="+post_id
post = self.request(url, headers=headers).json()["body"]
post = self.request(url, headers=self.headers).json()["body"]
content_body = post.pop("body", None)
if content_body:
@@ -98,8 +108,47 @@ class FanboxExtractor(Extractor):
post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False
if self._meta_user:
post["user"] = self._get_user_data(post["creatorId"])
if self._meta_plan:
plans = self._get_plan_data(post["creatorId"])
post["plan"] = plans[post["feeRequired"]]
return content_body, post
@memcache(keyarg=1)
def _get_user_data(self, creator_id):
url = "https://api.fanbox.cc/creator.get"
params = {"creatorId": creator_id}
data = self.request(url, params=params, headers=self.headers).json()
user = data["body"]
user.update(user.pop("user"))
return user
@memcache(keyarg=1)
def _get_plan_data(self, creator_id):
url = "https://api.fanbox.cc/plan.listCreator"
params = {"creatorId": creator_id}
data = self.request(url, params=params, headers=self.headers).json()
plans = {0: {
"id" : "",
"title" : "",
"fee" : 0,
"description" : "",
"coverImageUrl" : "",
"creatorId" : creator_id,
"hasAdultContent": None,
"paymentMethod" : None,
}}
for plan in data["body"]:
del plan["user"]
plans[plan["fee"]] = plan
return plans
def _get_urls_from_post(self, content_body, post):
num = 0
cover_image = post.get("coverImageUrl")

View File

@@ -86,6 +86,43 @@ __tests__ = (
"content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, September 5th, 2022, we are happy to announce the start of the FANBOX hashtag event #MySetupTour ! \nAbout the event\nTo join this event .+ \nPlease check this page for further details regarding the Privacy & Terms.\nhttps://fanbox.pixiv.help/.+/10184952456601\n\n\nThank you for your continued support of FANBOX.$",
},
{
"#url" : "https://official-en.fanbox.cc/posts/7022572",
"#comment" : "'plan' and 'user' metadata (#4921)",
"#category": ("", "fanbox", "post"),
"#class" : fanbox.FanboxPostExtractor,
"#options" : {"metadata": True},
"plan": {
"coverImageUrl" : "",
"creatorId" : "official-en",
"description" : "",
"fee" : 0,
"hasAdultContent": None,
"id" : "",
"paymentMethod" : None,
"title" : "",
},
"user": {
"coverImageUrl" : "https://pixiv.pximg.net/c/1620x580_90_a2_g5/fanbox/public/images/creator/74349833/cover/n9mX8q4tUXHXXj7sK1RPWyUu.jpeg",
"creatorId" : "official-en",
"description" : "This is the official English pixivFANBOX account! \n(official Japanese account: https://official.fanbox.cc/ )\n\npixivFANBOX is a subscription service for building a reliable fan community where creators can nurture creative lifestyles together with their fans.\nFollowers can be notified of the updates from their favorite creators they are following. Supporters can enjoy closer communication with creators through exclusive content and their latest information.\n",
"hasAdultContent" : False,
"hasBoothShop" : False,
"iconUrl" : "https://pixiv.pximg.net/c/160x160_90_a2_g5/fanbox/public/images/user/74349833/icon/oJH0OoGoSixLrJXlnneNvC95.jpeg",
"isAcceptingRequest": False,
"isFollowed" : False,
"isStopped" : False,
"isSupported" : False,
"name" : "pixivFANBOX English",
"profileItems" : [],
"profileLinks" : [
"https://twitter.com/pixivfanbox",
],
"userId" : "74349833",
},
},
{
"#url" : "https://mochirong.fanbox.cc/posts/3746116",
"#comment" : "imageMap file order (#2718)",