[behance] fix and update 'user' extractor (#4417)
This commit is contained in:
@@ -26,14 +26,36 @@ class BehanceExtractor(Extractor):
|
|||||||
def galleries(self):
|
def galleries(self):
|
||||||
"""Return all relevant gallery URLs"""
|
"""Return all relevant gallery URLs"""
|
||||||
|
|
||||||
@staticmethod
|
def _request_graphql(self, endpoint, variables):
|
||||||
def _update(data):
|
url = self.root + "/v3/graphql"
|
||||||
|
headers = {
|
||||||
|
"Origin" : self.root,
|
||||||
|
"Referer": self.root + "/",
|
||||||
|
"X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
|
||||||
|
"X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
|
}
|
||||||
|
cookies = {
|
||||||
|
"bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
|
||||||
|
"gk_suid": "62735605",
|
||||||
|
"ilo0" : "true",
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"query" : GRAPHQL_QUERIES[endpoint],
|
||||||
|
"variables": variables,
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.request(url, method="POST", headers=headers,
|
||||||
|
cookies=cookies, json=data).json()["data"]
|
||||||
|
|
||||||
|
def _update(self, data):
|
||||||
# compress data to simple lists
|
# compress data to simple lists
|
||||||
if data["fields"] and isinstance(data["fields"][0], dict):
|
if data["fields"] and isinstance(data["fields"][0], dict):
|
||||||
data["fields"] = [
|
data["fields"] = [
|
||||||
field.get("name") or field.get("label")
|
field.get("name") or field.get("label")
|
||||||
for field in data["fields"]
|
for field in data["fields"]
|
||||||
]
|
]
|
||||||
|
|
||||||
data["owners"] = [
|
data["owners"] = [
|
||||||
owner.get("display_name") or owner.get("displayName")
|
owner.get("display_name") or owner.get("displayName")
|
||||||
for owner in data["owners"]
|
for owner in data["owners"]
|
||||||
@@ -193,7 +215,7 @@ class BehanceUserExtractor(BehanceExtractor):
|
|||||||
categorytransfer = True
|
categorytransfer = True
|
||||||
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
|
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
|
||||||
test = ("https://www.behance.net/alexstrohl", {
|
test = ("https://www.behance.net/alexstrohl", {
|
||||||
"count": ">= 8",
|
"count": ">= 11",
|
||||||
"pattern": BehanceGalleryExtractor.pattern,
|
"pattern": BehanceGalleryExtractor.pattern,
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -202,17 +224,20 @@ class BehanceUserExtractor(BehanceExtractor):
|
|||||||
self.user = match.group(1)
|
self.user = match.group(1)
|
||||||
|
|
||||||
def galleries(self):
|
def galleries(self):
|
||||||
url = "{}/{}/projects".format(self.root, self.user)
|
endpoint = "GetProfileProjects"
|
||||||
params = {"offset": 0}
|
variables = {
|
||||||
headers = {"X-Requested-With": "XMLHttpRequest"}
|
"username": self.user,
|
||||||
|
"after" : "MAo=",
|
||||||
|
}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params, headers=headers).json()
|
data = self._request_graphql(endpoint, variables)
|
||||||
work = data["profile"]["activeSection"]["work"]
|
items = data["user"]["profileProjects"]
|
||||||
yield from work["projects"]
|
yield from items["nodes"]
|
||||||
if not work["hasMore"]:
|
|
||||||
|
if not items["pageInfo"]["hasNextPage"]:
|
||||||
return
|
return
|
||||||
params["offset"] += len(work["projects"])
|
variables["after"] = items["pageInfo"]["endCursor"]
|
||||||
|
|
||||||
|
|
||||||
class BehanceCollectionExtractor(BehanceExtractor):
|
class BehanceCollectionExtractor(BehanceExtractor):
|
||||||
@@ -221,7 +246,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
|
|||||||
categorytransfer = True
|
categorytransfer = True
|
||||||
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
|
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
|
||||||
test = ("https://www.behance.net/collection/71340149/inspiration", {
|
test = ("https://www.behance.net/collection/71340149/inspiration", {
|
||||||
"count": ">= 145",
|
"count": ">= 150",
|
||||||
"pattern": BehanceGalleryExtractor.pattern,
|
"pattern": BehanceGalleryExtractor.pattern,
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -230,21 +255,186 @@ class BehanceCollectionExtractor(BehanceExtractor):
|
|||||||
self.collection_id = match.group(1)
|
self.collection_id = match.group(1)
|
||||||
|
|
||||||
def galleries(self):
|
def galleries(self):
|
||||||
url = self.root + "/v3/graphql"
|
endpoint = "GetMoodboardItemsAndRecommendations"
|
||||||
headers = {
|
variables = {
|
||||||
"Origin" : self.root,
|
"afterItem": "MAo=",
|
||||||
"Referer": self.root + "/collection/" + self.collection_id,
|
"firstItem": 40,
|
||||||
"X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
|
"id" : int(self.collection_id),
|
||||||
"X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
|
"shouldGetItems" : True,
|
||||||
"X-Requested-With": "XMLHttpRequest",
|
"shouldGetMoodboardFields": False,
|
||||||
}
|
"shouldGetRecommendations": False,
|
||||||
cookies = {
|
|
||||||
"bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
|
|
||||||
"gk_suid": "66981391",
|
|
||||||
"ilo0" : "true",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
query = """
|
while True:
|
||||||
|
data = self._request_graphql(endpoint, variables)
|
||||||
|
items = data["moodboard"]["items"]
|
||||||
|
|
||||||
|
for node in items["nodes"]:
|
||||||
|
yield node["entity"]
|
||||||
|
|
||||||
|
if not items["pageInfo"]["hasNextPage"]:
|
||||||
|
return
|
||||||
|
variables["afterItem"] = items["pageInfo"]["endCursor"]
|
||||||
|
|
||||||
|
|
||||||
|
GRAPHQL_QUERIES = {
|
||||||
|
"GetProfileProjects": """\
|
||||||
|
query GetProfileProjects($username: String, $after: String) {
|
||||||
|
user(username: $username) {
|
||||||
|
profileProjects(first: 12, after: $after) {
|
||||||
|
pageInfo {
|
||||||
|
endCursor
|
||||||
|
hasNextPage
|
||||||
|
}
|
||||||
|
nodes {
|
||||||
|
__typename
|
||||||
|
adminFlags {
|
||||||
|
mature_lock
|
||||||
|
privacy_lock
|
||||||
|
dmca_lock
|
||||||
|
flagged_lock
|
||||||
|
privacy_violation_lock
|
||||||
|
trademark_lock
|
||||||
|
spam_lock
|
||||||
|
eu_ip_lock
|
||||||
|
}
|
||||||
|
colors {
|
||||||
|
r
|
||||||
|
g
|
||||||
|
b
|
||||||
|
}
|
||||||
|
covers {
|
||||||
|
size_202 {
|
||||||
|
url
|
||||||
|
}
|
||||||
|
size_404 {
|
||||||
|
url
|
||||||
|
}
|
||||||
|
size_808 {
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
features {
|
||||||
|
url
|
||||||
|
name
|
||||||
|
featuredOn
|
||||||
|
ribbon {
|
||||||
|
image
|
||||||
|
image2x
|
||||||
|
image3x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fields {
|
||||||
|
id
|
||||||
|
label
|
||||||
|
slug
|
||||||
|
url
|
||||||
|
}
|
||||||
|
hasMatureContent
|
||||||
|
id
|
||||||
|
isFeatured
|
||||||
|
isHiddenFromWorkTab
|
||||||
|
isMatureReviewSubmitted
|
||||||
|
isOwner
|
||||||
|
isFounder
|
||||||
|
isPinnedToSubscriptionOverview
|
||||||
|
isPrivate
|
||||||
|
linkedAssets {
|
||||||
|
...sourceLinkFields
|
||||||
|
}
|
||||||
|
linkedAssetsCount
|
||||||
|
sourceFiles {
|
||||||
|
...sourceFileFields
|
||||||
|
}
|
||||||
|
matureAccess
|
||||||
|
modifiedOn
|
||||||
|
name
|
||||||
|
owners {
|
||||||
|
...OwnerFields
|
||||||
|
images {
|
||||||
|
size_50 {
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
premium
|
||||||
|
publishedOn
|
||||||
|
stats {
|
||||||
|
appreciations {
|
||||||
|
all
|
||||||
|
}
|
||||||
|
views {
|
||||||
|
all
|
||||||
|
}
|
||||||
|
comments {
|
||||||
|
all
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slug
|
||||||
|
tools {
|
||||||
|
id
|
||||||
|
title
|
||||||
|
category
|
||||||
|
categoryLabel
|
||||||
|
categoryId
|
||||||
|
approved
|
||||||
|
url
|
||||||
|
backgroundColor
|
||||||
|
}
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment sourceFileFields on SourceFile {
|
||||||
|
__typename
|
||||||
|
sourceFileId
|
||||||
|
projectId
|
||||||
|
userId
|
||||||
|
title
|
||||||
|
assetId
|
||||||
|
renditionUrl
|
||||||
|
mimeType
|
||||||
|
size
|
||||||
|
category
|
||||||
|
licenseType
|
||||||
|
unitAmount
|
||||||
|
currency
|
||||||
|
tier
|
||||||
|
hidden
|
||||||
|
extension
|
||||||
|
hasUserPurchased
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment sourceLinkFields on LinkedAsset {
|
||||||
|
__typename
|
||||||
|
name
|
||||||
|
premium
|
||||||
|
url
|
||||||
|
category
|
||||||
|
licenseType
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment OwnerFields on User {
|
||||||
|
displayName
|
||||||
|
hasPremiumAccess
|
||||||
|
id
|
||||||
|
isFollowing
|
||||||
|
isProfileOwner
|
||||||
|
location
|
||||||
|
locationUrl
|
||||||
|
url
|
||||||
|
username
|
||||||
|
availabilityInfo {
|
||||||
|
availabilityTimeline
|
||||||
|
isAvailableFullTime
|
||||||
|
isAvailableFreelance
|
||||||
|
}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
|
||||||
|
"GetMoodboardItemsAndRecommendations": """\
|
||||||
query GetMoodboardItemsAndRecommendations(
|
query GetMoodboardItemsAndRecommendations(
|
||||||
$id: Int!
|
$id: Int!
|
||||||
$firstItem: Int!
|
$firstItem: Int!
|
||||||
@@ -289,13 +479,7 @@ fragment moodboardFields on Moodboard {
|
|||||||
url
|
url
|
||||||
isOwner
|
isOwner
|
||||||
owners {
|
owners {
|
||||||
id
|
...OwnerFields
|
||||||
displayName
|
|
||||||
url
|
|
||||||
firstName
|
|
||||||
location
|
|
||||||
locationUrl
|
|
||||||
isFollowing
|
|
||||||
images {
|
images {
|
||||||
size_50 {
|
size_50 {
|
||||||
url
|
url
|
||||||
@@ -320,6 +504,7 @@ fragment moodboardFields on Moodboard {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fragment projectFields on Project {
|
fragment projectFields on Project {
|
||||||
|
__typename
|
||||||
id
|
id
|
||||||
isOwner
|
isOwner
|
||||||
publishedOn
|
publishedOn
|
||||||
@@ -348,13 +533,7 @@ fragment projectFields on Project {
|
|||||||
b
|
b
|
||||||
}
|
}
|
||||||
owners {
|
owners {
|
||||||
url
|
...OwnerFields
|
||||||
displayName
|
|
||||||
id
|
|
||||||
location
|
|
||||||
locationUrl
|
|
||||||
isProfileOwner
|
|
||||||
isFollowing
|
|
||||||
images {
|
images {
|
||||||
size_50 {
|
size_50 {
|
||||||
url
|
url
|
||||||
@@ -488,26 +667,23 @@ fragment nodesFields on MoodboardItem {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
|
||||||
variables = {
|
|
||||||
"afterItem": "MAo=",
|
|
||||||
"firstItem": 40,
|
|
||||||
"id" : int(self.collection_id),
|
|
||||||
"shouldGetItems" : True,
|
|
||||||
"shouldGetMoodboardFields": False,
|
|
||||||
"shouldGetRecommendations": False,
|
|
||||||
}
|
|
||||||
data = {"query": query, "variables": variables}
|
|
||||||
|
|
||||||
while True:
|
fragment OwnerFields on User {
|
||||||
items = self.request(
|
displayName
|
||||||
url, method="POST", headers=headers,
|
hasPremiumAccess
|
||||||
cookies=cookies, json=data,
|
id
|
||||||
).json()["data"]["moodboard"]["items"]
|
isFollowing
|
||||||
|
isProfileOwner
|
||||||
|
location
|
||||||
|
locationUrl
|
||||||
|
url
|
||||||
|
username
|
||||||
|
availabilityInfo {
|
||||||
|
availabilityTimeline
|
||||||
|
isAvailableFullTime
|
||||||
|
isAvailableFreelance
|
||||||
|
}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
|
||||||
for node in items["nodes"]:
|
}
|
||||||
yield node["entity"]
|
|
||||||
|
|
||||||
if not items["pageInfo"]["hasNextPage"]:
|
|
||||||
return
|
|
||||||
variables["afterItem"] = items["pageInfo"]["endCursor"]
|
|
||||||
|
|||||||
Reference in New Issue
Block a user