[fantia] refactor

- embed response data as hidden '_data' field (instead of returning/passing 'resp') - split _get_urls_from_post()
2023-06-15 19:33:59 +02:00
parent 6c8bf9a762
commit dc7af00014
1 changed files with 65 additions and 50 deletions
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -31,15 +31,22 @@ class FantiaExtractor(Extractor):
            FantiaExtractor._warning = False

        for post_id in self.posts():
-            full_response, post = self._get_post_data(post_id)
-            yield Message.Directory, post
+            post = self._get_post_data(post_id)
            post["num"] = 0
-            for url, url_data in self._get_urls_from_post(full_response, post):
-                post["num"] += 1
-                fname = url_data["content_filename"] or url
-                text.nameext_from_url(fname, url_data)
-                url_data["file_url"] = url
-                yield Message.Url, url, url_data
+
+            for content in self._get_post_contents(post):
+                post["content_category"] = content["category"]
+                post["content_title"] = content["title"]
+                post["content_filename"] = content.get("filename", "")
+                post["content_id"] = content["id"]
+                yield Message.Directory, post
+
+                for url in self._get_content_urls(post, content):
+                    text.nameext_from_url(
+                        post["content_filename"] or url, post)
+                    post["file_url"] = url
+                    post["num"] += 1
+                    yield Message.Url, url, post

    def posts(self):
        """Return post IDs"""
@@ -71,7 +78,7 @@ class FantiaExtractor(Extractor):
        """Fetch and process post data"""
        url = self.root+"/api/v1/posts/"+post_id
        resp = self.request(url, headers=self.headers).json()["post"]
-        post = {
+        return {
            "post_id": resp["id"],
            "post_url": self.root + "/posts/" + str(resp["id"]),
            "post_title": resp["title"],
@@ -85,55 +92,63 @@ class FantiaExtractor(Extractor):
            "fanclub_user_name": resp["fanclub"]["user"]["name"],
            "fanclub_name": resp["fanclub"]["name"],
            "fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]),
-            "tags": resp["tags"]
+            "tags": resp["tags"],
+            "_data": resp,
        }
-        return resp, post

-    def _get_urls_from_post(self, resp, post):
+    def _get_post_contents(self, post):
+        contents = post["_data"]["post_contents"]
+
+        try:
+            url = post["_data"]["thumb"]["original"]
+        except Exception:
+            pass
+        else:
+            contents.insert(0, {
+                "id": "thumb",
+                "title": "thumb",
+                "category": "thumb",
+                "download_uri": url,
+            })
+
+        return contents
+
+    def _get_content_urls(self, post, content):
        """Extract individual URL data from the response"""
-        if "thumb" in resp and resp["thumb"] and "original" in resp["thumb"]:
-            post["content_filename"] = ""
-            post["content_category"] = "thumb"
-            post["file_id"] = "thumb"
-            yield resp["thumb"]["original"], post
+        if "comment" in content:
+            post["content_comment"] = content["comment"]

-        for content in resp["post_contents"]:
-            post["content_category"] = content["category"]
-            post["content_title"] = content["title"]
-            post["content_filename"] = content.get("filename", "")
-            post["content_id"] = content["id"]
+        if "post_content_photos" in content:
+            for photo in content["post_content_photos"]:
+                post["file_id"] = photo["id"]
+                yield photo["url"]["original"]

-            if "comment" in content:
-                post["content_comment"] = content["comment"]
+        if "download_uri" in content:
+            post["file_id"] = content["id"]
+            url = content["download_uri"]
+            if url[0] == "/":
+                url = self.root + url
+            yield url

-            if "post_content_photos" in content:
-                for photo in content["post_content_photos"]:
-                    post["file_id"] = photo["id"]
-                    yield photo["url"]["original"], post
+        if content["category"] == "blog" and "comment" in content:
+            comment_json = util.json_loads(content["comment"])
+            ops = comment_json.get("ops") or ()

-            if "download_uri" in content:
-                post["file_id"] = content["id"]
-                yield self.root+"/"+content["download_uri"], post
+            # collect blogpost text first
+            blog_text = ""
+            for op in ops:
+                insert = op.get("insert")
+                if isinstance(insert, str):
+                    blog_text += insert
+            post["blogpost_text"] = blog_text

-            if content["category"] == "blog" and "comment" in content:
-                comment_json = util.json_loads(content["comment"])
-                ops = comment_json.get("ops", ())
-
-                # collect blogpost text first
-                blog_text = ""
-                for op in ops:
-                    insert = op.get("insert")
-                    if isinstance(insert, str):
-                        blog_text += insert
-                post["blogpost_text"] = blog_text
-
-                # collect images
-                for op in ops:
-                    insert = op.get("insert")
-                    if isinstance(insert, dict) and "fantiaImage" in insert:
-                        img = insert["fantiaImage"]
-                        post["file_id"] = img["id"]
-                        yield "https://fantia.jp" + img["original_url"], post
+            # collect images
+            for op in ops:
+                insert = op.get("insert")
+                if isinstance(insert, dict) and "fantiaImage" in insert:
+                    img = insert["fantiaImage"]
+                    post["file_id"] = img["id"]
+                    yield self.root + img["original_url"]


 class FantiaCreatorExtractor(FantiaExtractor):