[booru] add an option to extract notes (only gelbooru for now) (#1457)
* [booru] add an option to extract notes (currently implemented only for gelbooru) * appease linter * [gelbooru] rename "text" to "body" in note extraction * add a code comment about reusing return value of _extended_tags
This commit is contained in:
@@ -57,6 +57,31 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
||||
tags[tag_type].append(text.unquote(tag_name))
|
||||
for key, value in tags.items():
|
||||
post["tags_" + key] = " ".join(value)
|
||||
return page
|
||||
|
||||
def _notes(self, post, page=None):
|
||||
if not page:
|
||||
url = "{}/index.php?page=post&s=view&id={}".format(
|
||||
self.root, post["id"])
|
||||
page = self.request(url).text
|
||||
notes = []
|
||||
notes_data = text.extract(page, '<section id="notes"', '</section>')[0]
|
||||
if not notes_data:
|
||||
return
|
||||
|
||||
note_iter = text.extract_iter(notes_data, '<article', '</article>')
|
||||
extr = text.extract
|
||||
for note_data in note_iter:
|
||||
note = {
|
||||
"width": int(extr(note_data, 'data-width="', '"')[0]),
|
||||
"height": int(extr(note_data, 'data-height="', '"')[0]),
|
||||
"x": int(extr(note_data, 'data-x="', '"')[0]),
|
||||
"y": int(extr(note_data, 'data-y="', '"')[0]),
|
||||
"body": extr(note_data, 'data-body="', '"')[0],
|
||||
}
|
||||
notes.append(note)
|
||||
|
||||
post["notes"] = notes
|
||||
|
||||
|
||||
BASE_PATTERN = GelbooruV02Extractor.update({
|
||||
|
||||
Reference in New Issue
Block a user