[booru] refactor 'tags' and 'notes' extraction

- move HTML request for post pages into its own function - move gelbooru_v02.py notes extraction to gelbooru.py since it only works there - clean up some code
2022-10-31 12:01:19 +01:00
parent 48bbe1ccf6
commit 775895f44b
7 changed files with 57 additions and 75 deletions
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -25,6 +25,7 @@ class BooruExtractor(BaseExtractor):
        data = self.metadata()
        tags = self.config("tags", False)
        notes = self.config("notes", False)
+        fetch_html = tags or notes

        for post in self.posts():
            try:
@@ -36,11 +37,13 @@ class BooruExtractor(BaseExtractor):
                               "(md5: %s)", post.get("id"), post.get("md5"))
                continue

-            page_html = None
-            if tags:
-                page_html = self._extended_tags(post)
-            if notes:
-                self._notes(post, page_html)
+            if fetch_html:
+                html = self._html(post)
+                if tags:
+                    self._tags(post, html)
+                if notes:
+                    self._notes(post, html)
+
            text.nameext_from_url(url, post)
            post.update(data)
            self._prepare(post)
@@ -67,16 +70,13 @@ class BooruExtractor(BaseExtractor):
    _file_url = operator.itemgetter("file_url")

    def _prepare(self, post):
-        """Prepare the 'post's metadata"""
+        """Prepare a 'post's metadata"""

-    def _extended_tags(self, post, page=None):
-        """Generate extended tag information
+    def _html(self, post):
+        """Return HTML content of a post"""

-        The return value of this function will be
-        passed to the _notes function as the page parameter.
-        This makes it possible to reuse the same HTML both for
-        extracting tags and notes.
-        """
+    def _tags(self, post, page):
+        """Extract extended tag metadata"""

-    def _notes(self, post, page=None):
-        """Generate information about notes"""
+    def _notes(self, post, page):
+        """Extract notes metadata"""