[fanbox] add 'metadata' option (#4921)
extracts 'plan' and extended 'user' metadata
This commit is contained in:
@@ -1569,7 +1569,7 @@ Default
|
|||||||
``false``
|
``false``
|
||||||
Example
|
Example
|
||||||
* ``notes,pools``
|
* ``notes,pools``
|
||||||
* ``["notes", "pools"``
|
* ``["notes", "pools"]``
|
||||||
Description
|
Description
|
||||||
Extract additional metadata (notes, pool metadata) if available.
|
Extract additional metadata (notes, pool metadata) if available.
|
||||||
|
|
||||||
@@ -1711,6 +1711,21 @@ Description
|
|||||||
* ``false``: Ignore embeds.
|
* ``false``: Ignore embeds.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.fanbox.metadata
|
||||||
|
-------------------------
|
||||||
|
Type
|
||||||
|
* ``bool``
|
||||||
|
* ``string``
|
||||||
|
* ``list`` of ``strings``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Example
|
||||||
|
* ``user,plan``
|
||||||
|
* ``["user", "plan"]``
|
||||||
|
Description
|
||||||
|
Extract ``plan`` and extended ``user`` metadata.
|
||||||
|
|
||||||
|
|
||||||
extractor.flickr.access-token & .access-token-secret
|
extractor.flickr.access-token & .access-token-secret
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
|
from ..cache import memcache
|
||||||
import re
|
import re
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
@@ -27,8 +28,20 @@ class FanboxExtractor(Extractor):
|
|||||||
_warning = True
|
_warning = True
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
|
self.headers = {"Origin": self.root}
|
||||||
self.embeds = self.config("embeds", True)
|
self.embeds = self.config("embeds", True)
|
||||||
|
|
||||||
|
includes = self.config("metadata")
|
||||||
|
if includes:
|
||||||
|
if isinstance(includes, str):
|
||||||
|
includes = includes.split(",")
|
||||||
|
elif not isinstance(includes, (list, tuple)):
|
||||||
|
includes = ("user", "plan")
|
||||||
|
self._meta_user = ("user" in includes)
|
||||||
|
self._meta_plan = ("plan" in includes)
|
||||||
|
else:
|
||||||
|
self._meta_user = self._meta_plan = False
|
||||||
|
|
||||||
if self._warning:
|
if self._warning:
|
||||||
if not self.cookies_check(("FANBOXSESSID",)):
|
if not self.cookies_check(("FANBOXSESSID",)):
|
||||||
self.log.warning("no 'FANBOXSESSID' cookie set")
|
self.log.warning("no 'FANBOXSESSID' cookie set")
|
||||||
@@ -43,11 +56,9 @@ class FanboxExtractor(Extractor):
|
|||||||
"""Return all relevant post objects"""
|
"""Return all relevant post objects"""
|
||||||
|
|
||||||
def _pagination(self, url):
|
def _pagination(self, url):
|
||||||
headers = {"Origin": self.root}
|
|
||||||
|
|
||||||
while url:
|
while url:
|
||||||
url = text.ensure_http_scheme(url)
|
url = text.ensure_http_scheme(url)
|
||||||
body = self.request(url, headers=headers).json()["body"]
|
body = self.request(url, headers=self.headers).json()["body"]
|
||||||
for item in body["items"]:
|
for item in body["items"]:
|
||||||
try:
|
try:
|
||||||
yield self._get_post_data(item["id"])
|
yield self._get_post_data(item["id"])
|
||||||
@@ -58,9 +69,8 @@ class FanboxExtractor(Extractor):
|
|||||||
|
|
||||||
def _get_post_data(self, post_id):
|
def _get_post_data(self, post_id):
|
||||||
"""Fetch and process post data"""
|
"""Fetch and process post data"""
|
||||||
headers = {"Origin": self.root}
|
|
||||||
url = "https://api.fanbox.cc/post.info?postId="+post_id
|
url = "https://api.fanbox.cc/post.info?postId="+post_id
|
||||||
post = self.request(url, headers=headers).json()["body"]
|
post = self.request(url, headers=self.headers).json()["body"]
|
||||||
|
|
||||||
content_body = post.pop("body", None)
|
content_body = post.pop("body", None)
|
||||||
if content_body:
|
if content_body:
|
||||||
@@ -98,8 +108,47 @@ class FanboxExtractor(Extractor):
|
|||||||
post["text"] = content_body.get("text") if content_body else None
|
post["text"] = content_body.get("text") if content_body else None
|
||||||
post["isCoverImage"] = False
|
post["isCoverImage"] = False
|
||||||
|
|
||||||
|
if self._meta_user:
|
||||||
|
post["user"] = self._get_user_data(post["creatorId"])
|
||||||
|
if self._meta_plan:
|
||||||
|
plans = self._get_plan_data(post["creatorId"])
|
||||||
|
post["plan"] = plans[post["feeRequired"]]
|
||||||
|
|
||||||
return content_body, post
|
return content_body, post
|
||||||
|
|
||||||
|
@memcache(keyarg=1)
|
||||||
|
def _get_user_data(self, creator_id):
|
||||||
|
url = "https://api.fanbox.cc/creator.get"
|
||||||
|
params = {"creatorId": creator_id}
|
||||||
|
data = self.request(url, params=params, headers=self.headers).json()
|
||||||
|
|
||||||
|
user = data["body"]
|
||||||
|
user.update(user.pop("user"))
|
||||||
|
|
||||||
|
return user
|
||||||
|
|
||||||
|
@memcache(keyarg=1)
|
||||||
|
def _get_plan_data(self, creator_id):
|
||||||
|
url = "https://api.fanbox.cc/plan.listCreator"
|
||||||
|
params = {"creatorId": creator_id}
|
||||||
|
data = self.request(url, params=params, headers=self.headers).json()
|
||||||
|
|
||||||
|
plans = {0: {
|
||||||
|
"id" : "",
|
||||||
|
"title" : "",
|
||||||
|
"fee" : 0,
|
||||||
|
"description" : "",
|
||||||
|
"coverImageUrl" : "",
|
||||||
|
"creatorId" : creator_id,
|
||||||
|
"hasAdultContent": None,
|
||||||
|
"paymentMethod" : None,
|
||||||
|
}}
|
||||||
|
for plan in data["body"]:
|
||||||
|
del plan["user"]
|
||||||
|
plans[plan["fee"]] = plan
|
||||||
|
|
||||||
|
return plans
|
||||||
|
|
||||||
def _get_urls_from_post(self, content_body, post):
|
def _get_urls_from_post(self, content_body, post):
|
||||||
num = 0
|
num = 0
|
||||||
cover_image = post.get("coverImageUrl")
|
cover_image = post.get("coverImageUrl")
|
||||||
|
|||||||
@@ -86,6 +86,43 @@ __tests__ = (
|
|||||||
"content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, September 5th, 2022, we are happy to announce the start of the FANBOX hashtag event #MySetupTour ! \nAbout the event\nTo join this event .+ \nPlease check this page for further details regarding the Privacy & Terms.\nhttps://fanbox.pixiv.help/.+/10184952456601\n\n\nThank you for your continued support of FANBOX.$",
|
"content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, September 5th, 2022, we are happy to announce the start of the FANBOX hashtag event #MySetupTour ! \nAbout the event\nTo join this event .+ \nPlease check this page for further details regarding the Privacy & Terms.\nhttps://fanbox.pixiv.help/.+/10184952456601\n\n\nThank you for your continued support of FANBOX.$",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://official-en.fanbox.cc/posts/7022572",
|
||||||
|
"#comment" : "'plan' and 'user' metadata (#4921)",
|
||||||
|
"#category": ("", "fanbox", "post"),
|
||||||
|
"#class" : fanbox.FanboxPostExtractor,
|
||||||
|
"#options" : {"metadata": True},
|
||||||
|
|
||||||
|
"plan": {
|
||||||
|
"coverImageUrl" : "",
|
||||||
|
"creatorId" : "official-en",
|
||||||
|
"description" : "",
|
||||||
|
"fee" : 0,
|
||||||
|
"hasAdultContent": None,
|
||||||
|
"id" : "",
|
||||||
|
"paymentMethod" : None,
|
||||||
|
"title" : "",
|
||||||
|
},
|
||||||
|
"user": {
|
||||||
|
"coverImageUrl" : "https://pixiv.pximg.net/c/1620x580_90_a2_g5/fanbox/public/images/creator/74349833/cover/n9mX8q4tUXHXXj7sK1RPWyUu.jpeg",
|
||||||
|
"creatorId" : "official-en",
|
||||||
|
"description" : "This is the official English pixivFANBOX account! \n(official Japanese account: https://official.fanbox.cc/ )\n\npixivFANBOX is a subscription service for building a reliable fan community where creators can nurture creative lifestyles together with their fans.\nFollowers can be notified of the updates from their favorite creators they are following. Supporters can enjoy closer communication with creators through exclusive content and their latest information.\n",
|
||||||
|
"hasAdultContent" : False,
|
||||||
|
"hasBoothShop" : False,
|
||||||
|
"iconUrl" : "https://pixiv.pximg.net/c/160x160_90_a2_g5/fanbox/public/images/user/74349833/icon/oJH0OoGoSixLrJXlnneNvC95.jpeg",
|
||||||
|
"isAcceptingRequest": False,
|
||||||
|
"isFollowed" : False,
|
||||||
|
"isStopped" : False,
|
||||||
|
"isSupported" : False,
|
||||||
|
"name" : "pixivFANBOX English",
|
||||||
|
"profileItems" : [],
|
||||||
|
"profileLinks" : [
|
||||||
|
"https://twitter.com/pixivfanbox",
|
||||||
|
],
|
||||||
|
"userId" : "74349833",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://mochirong.fanbox.cc/posts/3746116",
|
"#url" : "https://mochirong.fanbox.cc/posts/3746116",
|
||||||
"#comment" : "imageMap file order (#2718)",
|
"#comment" : "imageMap file order (#2718)",
|
||||||
|
|||||||
Reference in New Issue
Block a user