[fanbox] add 'metadata' option (#4921)

extracts 'plan' and extended 'user' metadata
This commit is contained in:
Mike Fährmann
2024-01-04 15:01:33 +01:00
parent 00570028a3
commit 7eaf648f2e
3 changed files with 107 additions and 6 deletions

View File

@@ -1569,7 +1569,7 @@ Default
``false`` ``false``
Example Example
* ``notes,pools`` * ``notes,pools``
* ``["notes", "pools"`` * ``["notes", "pools"]``
Description Description
Extract additional metadata (notes, pool metadata) if available. Extract additional metadata (notes, pool metadata) if available.
@@ -1711,6 +1711,21 @@ Description
* ``false``: Ignore embeds. * ``false``: Ignore embeds.
extractor.fanbox.metadata
-------------------------
Type
* ``bool``
* ``string``
* ``list`` of ``strings``
Default
``false``
Example
* ``user,plan``
* ``["user", "plan"]``
Description
Extract ``plan`` and extended ``user`` metadata.
extractor.flickr.access-token & .access-token-secret extractor.flickr.access-token & .access-token-secret
---------------------------------------------------- ----------------------------------------------------
Type Type

View File

@@ -8,6 +8,7 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text
from ..cache import memcache
import re import re
BASE_PATTERN = ( BASE_PATTERN = (
@@ -27,8 +28,20 @@ class FanboxExtractor(Extractor):
_warning = True _warning = True
def _init(self): def _init(self):
self.headers = {"Origin": self.root}
self.embeds = self.config("embeds", True) self.embeds = self.config("embeds", True)
includes = self.config("metadata")
if includes:
if isinstance(includes, str):
includes = includes.split(",")
elif not isinstance(includes, (list, tuple)):
includes = ("user", "plan")
self._meta_user = ("user" in includes)
self._meta_plan = ("plan" in includes)
else:
self._meta_user = self._meta_plan = False
if self._warning: if self._warning:
if not self.cookies_check(("FANBOXSESSID",)): if not self.cookies_check(("FANBOXSESSID",)):
self.log.warning("no 'FANBOXSESSID' cookie set") self.log.warning("no 'FANBOXSESSID' cookie set")
@@ -43,11 +56,9 @@ class FanboxExtractor(Extractor):
"""Return all relevant post objects""" """Return all relevant post objects"""
def _pagination(self, url): def _pagination(self, url):
headers = {"Origin": self.root}
while url: while url:
url = text.ensure_http_scheme(url) url = text.ensure_http_scheme(url)
body = self.request(url, headers=headers).json()["body"] body = self.request(url, headers=self.headers).json()["body"]
for item in body["items"]: for item in body["items"]:
try: try:
yield self._get_post_data(item["id"]) yield self._get_post_data(item["id"])
@@ -58,9 +69,8 @@ class FanboxExtractor(Extractor):
def _get_post_data(self, post_id): def _get_post_data(self, post_id):
"""Fetch and process post data""" """Fetch and process post data"""
headers = {"Origin": self.root}
url = "https://api.fanbox.cc/post.info?postId="+post_id url = "https://api.fanbox.cc/post.info?postId="+post_id
post = self.request(url, headers=headers).json()["body"] post = self.request(url, headers=self.headers).json()["body"]
content_body = post.pop("body", None) content_body = post.pop("body", None)
if content_body: if content_body:
@@ -98,8 +108,47 @@ class FanboxExtractor(Extractor):
post["text"] = content_body.get("text") if content_body else None post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False post["isCoverImage"] = False
if self._meta_user:
post["user"] = self._get_user_data(post["creatorId"])
if self._meta_plan:
plans = self._get_plan_data(post["creatorId"])
post["plan"] = plans[post["feeRequired"]]
return content_body, post return content_body, post
@memcache(keyarg=1)
def _get_user_data(self, creator_id):
url = "https://api.fanbox.cc/creator.get"
params = {"creatorId": creator_id}
data = self.request(url, params=params, headers=self.headers).json()
user = data["body"]
user.update(user.pop("user"))
return user
@memcache(keyarg=1)
def _get_plan_data(self, creator_id):
url = "https://api.fanbox.cc/plan.listCreator"
params = {"creatorId": creator_id}
data = self.request(url, params=params, headers=self.headers).json()
plans = {0: {
"id" : "",
"title" : "",
"fee" : 0,
"description" : "",
"coverImageUrl" : "",
"creatorId" : creator_id,
"hasAdultContent": None,
"paymentMethod" : None,
}}
for plan in data["body"]:
del plan["user"]
plans[plan["fee"]] = plan
return plans
def _get_urls_from_post(self, content_body, post): def _get_urls_from_post(self, content_body, post):
num = 0 num = 0
cover_image = post.get("coverImageUrl") cover_image = post.get("coverImageUrl")

View File

@@ -86,6 +86,43 @@ __tests__ = (
"content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, September 5th, 2022, we are happy to announce the start of the FANBOX hashtag event #MySetupTour ! \nAbout the event\nTo join this event .+ \nPlease check this page for further details regarding the Privacy & Terms.\nhttps://fanbox.pixiv.help/.+/10184952456601\n\n\nThank you for your continued support of FANBOX.$", "content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, September 5th, 2022, we are happy to announce the start of the FANBOX hashtag event #MySetupTour ! \nAbout the event\nTo join this event .+ \nPlease check this page for further details regarding the Privacy & Terms.\nhttps://fanbox.pixiv.help/.+/10184952456601\n\n\nThank you for your continued support of FANBOX.$",
}, },
{
"#url" : "https://official-en.fanbox.cc/posts/7022572",
"#comment" : "'plan' and 'user' metadata (#4921)",
"#category": ("", "fanbox", "post"),
"#class" : fanbox.FanboxPostExtractor,
"#options" : {"metadata": True},
"plan": {
"coverImageUrl" : "",
"creatorId" : "official-en",
"description" : "",
"fee" : 0,
"hasAdultContent": None,
"id" : "",
"paymentMethod" : None,
"title" : "",
},
"user": {
"coverImageUrl" : "https://pixiv.pximg.net/c/1620x580_90_a2_g5/fanbox/public/images/creator/74349833/cover/n9mX8q4tUXHXXj7sK1RPWyUu.jpeg",
"creatorId" : "official-en",
"description" : "This is the official English pixivFANBOX account! \n(official Japanese account: https://official.fanbox.cc/ )\n\npixivFANBOX is a subscription service for building a reliable fan community where creators can nurture creative lifestyles together with their fans.\nFollowers can be notified of the updates from their favorite creators they are following. Supporters can enjoy closer communication with creators through exclusive content and their latest information.\n",
"hasAdultContent" : False,
"hasBoothShop" : False,
"iconUrl" : "https://pixiv.pximg.net/c/160x160_90_a2_g5/fanbox/public/images/user/74349833/icon/oJH0OoGoSixLrJXlnneNvC95.jpeg",
"isAcceptingRequest": False,
"isFollowed" : False,
"isStopped" : False,
"isSupported" : False,
"name" : "pixivFANBOX English",
"profileItems" : [],
"profileLinks" : [
"https://twitter.com/pixivfanbox",
],
"userId" : "74349833",
},
},
{ {
"#url" : "https://mochirong.fanbox.cc/posts/3746116", "#url" : "https://mochirong.fanbox.cc/posts/3746116",
"#comment" : "imageMap file order (#2718)", "#comment" : "imageMap file order (#2718)",