[furaffinity] add 'descriptions' option (#1231)
This commit is contained in:
@@ -969,6 +969,19 @@ Description
|
|||||||
``"l"``, ...) to use as an upper limit.
|
``"l"``, ...) to use as an upper limit.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.furaffinity.descriptions
|
||||||
|
----------------------------------
|
||||||
|
Type
|
||||||
|
``string``
|
||||||
|
Default
|
||||||
|
``"text"``
|
||||||
|
Description
|
||||||
|
Controls the format of ``description`` metadata fields.
|
||||||
|
|
||||||
|
* ``"text"``: Plain text with HTML tags removed
|
||||||
|
* ``"html"``: Raw HTML content
|
||||||
|
|
||||||
|
|
||||||
extractor.furaffinity.include
|
extractor.furaffinity.include
|
||||||
-----------------------------
|
-----------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -29,6 +29,9 @@ class FuraffinityExtractor(Extractor):
|
|||||||
self.user = match.group(1)
|
self.user = match.group(1)
|
||||||
self.offset = 0
|
self.offset = 0
|
||||||
|
|
||||||
|
if self.config("descriptions") == "html":
|
||||||
|
self._process_description = lambda x: x.strip()
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
metadata = self.metadata()
|
metadata = self.metadata()
|
||||||
for post_id in util.advance(self.posts(), self.offset):
|
for post_id in util.advance(self.posts(), self.offset):
|
||||||
@@ -83,8 +86,8 @@ class FuraffinityExtractor(Extractor):
|
|||||||
if tags:
|
if tags:
|
||||||
# new site layout
|
# new site layout
|
||||||
data["tags"] = text.split_html(tags)
|
data["tags"] = text.split_html(tags)
|
||||||
data["description"] = text.unescape(rh(extr(
|
data["description"] = self._process_description(extr(
|
||||||
'class="section-body">', '</div>'), "", ""))
|
'class="section-body">', '</div>'))
|
||||||
data["views"] = pi(rh(extr('class="views">', '</span>')))
|
data["views"] = pi(rh(extr('class="views">', '</span>')))
|
||||||
data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
|
data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
|
||||||
data["comments"] = pi(rh(extr('class="comments">', '</span>')))
|
data["comments"] = pi(rh(extr('class="comments">', '</span>')))
|
||||||
@@ -109,12 +112,16 @@ class FuraffinityExtractor(Extractor):
|
|||||||
data["tags"] = text.split_html(extr(
|
data["tags"] = text.split_html(extr(
|
||||||
'id="keywords">', '</div>'))[::2]
|
'id="keywords">', '</div>'))[::2]
|
||||||
data["rating"] = extr('<img alt="', ' ')
|
data["rating"] = extr('<img alt="', ' ')
|
||||||
data["description"] = text.unescape(text.remove_html(extr(
|
data["description"] = self._process_description(extr(
|
||||||
"</table>", "</table>"), "", ""))
|
"</table>", "</table>"))
|
||||||
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
|
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _process_description(description):
|
||||||
|
return text.unescape(text.remove_html(description, "", ""))
|
||||||
|
|
||||||
def _pagination(self):
|
def _pagination(self):
|
||||||
num = 1
|
num = 1
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user