[bellazon] add 'order-posts' option (#8248)
This commit is contained in:
@@ -1600,6 +1600,22 @@ Description
|
||||
``image``, ``video``, ``mediacollection``, ``embed``, ``text``.
|
||||
|
||||
|
||||
extractor.bellazon.order-posts
|
||||
------------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"desc"``
|
||||
Description
|
||||
Controls the order in which
|
||||
posts of a ``thread`` are processed.
|
||||
|
||||
``"asc"``
|
||||
Ascending order (oldest first)
|
||||
``"desc"`` | ``"reverse"``
|
||||
Descending order (newest first)
|
||||
|
||||
|
||||
extractor.bellazon.quoted
|
||||
-------------------------
|
||||
Type
|
||||
|
||||
@@ -156,7 +156,8 @@
|
||||
},
|
||||
"bellazon":
|
||||
{
|
||||
"quoted": false
|
||||
"order-posts": "desc",
|
||||
"quoted" : false
|
||||
},
|
||||
"bilibili":
|
||||
{
|
||||
|
||||
@@ -91,6 +91,28 @@ class BellazonExtractor(Extractor):
|
||||
pnum += 1
|
||||
url = f"{base}/page/{pnum}/"
|
||||
|
||||
def _pagination_reverse(self, base, pnum=None):
|
||||
base = f"{self.root}{base}"
|
||||
|
||||
url = f"{base}/page/9999/" # force redirect to highest page number
|
||||
with self.request(url) as response:
|
||||
parts = response.url.rsplit("/", 3)
|
||||
pnum = text.parse_int(parts[2]) if parts[1] == "page" else 1
|
||||
page = response.text
|
||||
|
||||
while True:
|
||||
yield page
|
||||
|
||||
pnum -= 1
|
||||
if pnum > 1:
|
||||
url = f"{base}/page/{pnum}/"
|
||||
elif pnum == 1:
|
||||
url = f"{base}/"
|
||||
else:
|
||||
return
|
||||
|
||||
page = self.request(url).text
|
||||
|
||||
def _parse_thread(self, page):
|
||||
schema = self._extract_jsonld(page)
|
||||
author = schema["author"]
|
||||
@@ -166,10 +188,22 @@ class BellazonThreadExtractor(BellazonExtractor):
|
||||
example = "https://www.bellazon.com/main/topic/123-SLUG/"
|
||||
|
||||
def posts(self):
|
||||
for page in self._pagination(*self.groups):
|
||||
if (order := self.config("order-posts")) and \
|
||||
order[0] not in ("d", "r"):
|
||||
pages = self._pagination(*self.groups)
|
||||
reverse = False
|
||||
else:
|
||||
pages = self._pagination_reverse(*self.groups)
|
||||
reverse = True
|
||||
|
||||
for page in pages:
|
||||
if "thread" not in self.kwdict:
|
||||
self.kwdict["thread"] = self._parse_thread(page)
|
||||
for html in text.extract_iter(page, "<article ", "</article>"):
|
||||
posts = text.extract_iter(page, "<article ", "</article>")
|
||||
if reverse:
|
||||
posts = list(posts)
|
||||
posts.reverse()
|
||||
for html in posts:
|
||||
yield self._parse_post(html)
|
||||
|
||||
|
||||
|
||||
@@ -244,6 +244,7 @@ __tests__ = (
|
||||
"#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
|
||||
"#class" : bellazon.BellazonThreadExtractor,
|
||||
"#range" : "1-5",
|
||||
"#options" : {"prder-posts": "asc"},
|
||||
"#results" : (
|
||||
"http://img292.echo.cx/my.php?image=4moon011rk.jpg",
|
||||
"http://img294.echo.cx/my.php?image=heroclip3jb.jpg",
|
||||
|
||||
Reference in New Issue
Block a user