[booru] add an option to extract notes (only gelbooru for now) (#1457)

* [booru] add an option to extract notes (currently implemented only for gelbooru)

* appease linter

* [gelbooru] rename "text" to "body" in note extraction

* add a code comment about reusing return value of _extended_tags
This commit is contained in:
thatfuckingbird
2021-04-13 23:40:24 +02:00
committed by GitHub
parent 78d7ee3ef4
commit dff03a6605
5 changed files with 74 additions and 3 deletions

View File

@@ -24,6 +24,7 @@ class BooruExtractor(BaseExtractor):
self.login()
data = self.metadata()
tags = self.config("tags", False)
notes = self.config("notes", False)
for post in self.posts():
try:
@@ -35,8 +36,11 @@ class BooruExtractor(BaseExtractor):
"(md5: %s)", post.get("id"), post.get("md5"))
continue
page_html = None
if tags:
self._extended_tags(post)
page_html = self._extended_tags(post)
if notes:
self._notes(post, page_html)
self._prepare(post)
post.update(data)
text.nameext_from_url(url, post)
@@ -66,4 +70,13 @@ class BooruExtractor(BaseExtractor):
"""Prepare the 'post's metadata"""
def _extended_tags(self, post, page=None):
"""Generate extended tag information"""
"""Generate extended tag information
The return value of this function will be
passed to the _notes function as the page parameter.
This makes it possible to reuse the same HTML both for
extracting tags and notes.
"""
def _notes(self, post, page=None):
"""Generate information about notes"""