[behance] fix collection extraction

This commit is contained in:
Mike Fährmann
2019-07-27 14:26:40 +02:00
parent 20f7b07312
commit 3969f9cbbd

View File

@@ -30,7 +30,8 @@ class BehanceExtractor(Extractor):
@staticmethod @staticmethod
def _update(data): def _update(data):
# compress data to simple lists # compress data to simple lists
data["fields"] = [field["name"] for field in data["fields"]] if data["fields"] and isinstance(data["fields"][0], dict):
data["fields"] = [field["name"] for field in data["fields"]]
data["owners"] = [owner["display_name"] for owner in data["owners"]] data["owners"] = [owner["display_name"] for owner in data["owners"]]
if "tags" in data: if "tags" in data:
data["tags"] = [tag["title"] for tag in data["tags"]] data["tags"] = [tag["title"] for tag in data["tags"]]
@@ -140,11 +141,11 @@ class BehanceUserExtractor(BehanceExtractor):
def galleries(self): def galleries(self):
url = "{}/{}/projects".format(self.root, self.user) url = "{}/{}/projects".format(self.root, self.user)
headers = {"X-Requested-With": "XMLHttpRequest"}
params = {"offset": 0} params = {"offset": 0}
headers = {"X-Requested-With": "XMLHttpRequest"}
while True: while True:
data = self.request(url, headers=headers, params=params).json() data = self.request(url, params=params, headers=headers).json()
work = data["profile"]["activeSection"]["work"] work = data["profile"]["activeSection"]["work"]
yield from work["projects"] yield from work["projects"]
if not work["hasMore"]: if not work["hasMore"]:
@@ -157,8 +158,8 @@ class BehanceCollectionExtractor(BehanceExtractor):
subcategory = "collection" subcategory = "collection"
categorytransfer = True categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)" pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
test = ("https://www.behance.net/collection/170615607/Sky", { test = ("https://www.behance.net/collection/71340149/inspiration", {
"count": ">= 13", "count": ">= 145",
"pattern": BehanceGalleryExtractor.pattern, "pattern": BehanceGalleryExtractor.pattern,
}) })
@@ -168,12 +169,13 @@ class BehanceCollectionExtractor(BehanceExtractor):
def galleries(self): def galleries(self):
url = "{}/collection/{}/a".format(self.root, self.collection_id) url = "{}/collection/{}/a".format(self.root, self.collection_id)
params = {"offset": 0}
headers = {"X-Requested-With": "XMLHttpRequest"} headers = {"X-Requested-With": "XMLHttpRequest"}
params = {}
while True: while True:
data = self.request(url, headers=headers, params=params).json() data = self.request(url, params=params, headers=headers).json()
yield from data["output"] for item in data["items"]:
if not data.get("offset"): yield item["project"]
if len(data["items"]) < 40:
return return
params["offset"] = data["offset"] params["offset"] += len(data["items"])