[bellazon] add 'order-posts' option (#8248)
This commit is contained in:
@@ -1600,6 +1600,22 @@ Description
|
|||||||
``image``, ``video``, ``mediacollection``, ``embed``, ``text``.
|
``image``, ``video``, ``mediacollection``, ``embed``, ``text``.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.bellazon.order-posts
|
||||||
|
------------------------------
|
||||||
|
Type
|
||||||
|
``string``
|
||||||
|
Default
|
||||||
|
``"desc"``
|
||||||
|
Description
|
||||||
|
Controls the order in which
|
||||||
|
posts of a ``thread`` are processed.
|
||||||
|
|
||||||
|
``"asc"``
|
||||||
|
Ascending order (oldest first)
|
||||||
|
``"desc"`` | ``"reverse"``
|
||||||
|
Descending order (newest first)
|
||||||
|
|
||||||
|
|
||||||
extractor.bellazon.quoted
|
extractor.bellazon.quoted
|
||||||
-------------------------
|
-------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -156,7 +156,8 @@
|
|||||||
},
|
},
|
||||||
"bellazon":
|
"bellazon":
|
||||||
{
|
{
|
||||||
"quoted": false
|
"order-posts": "desc",
|
||||||
|
"quoted" : false
|
||||||
},
|
},
|
||||||
"bilibili":
|
"bilibili":
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -91,6 +91,28 @@ class BellazonExtractor(Extractor):
|
|||||||
pnum += 1
|
pnum += 1
|
||||||
url = f"{base}/page/{pnum}/"
|
url = f"{base}/page/{pnum}/"
|
||||||
|
|
||||||
|
def _pagination_reverse(self, base, pnum=None):
|
||||||
|
base = f"{self.root}{base}"
|
||||||
|
|
||||||
|
url = f"{base}/page/9999/" # force redirect to highest page number
|
||||||
|
with self.request(url) as response:
|
||||||
|
parts = response.url.rsplit("/", 3)
|
||||||
|
pnum = text.parse_int(parts[2]) if parts[1] == "page" else 1
|
||||||
|
page = response.text
|
||||||
|
|
||||||
|
while True:
|
||||||
|
yield page
|
||||||
|
|
||||||
|
pnum -= 1
|
||||||
|
if pnum > 1:
|
||||||
|
url = f"{base}/page/{pnum}/"
|
||||||
|
elif pnum == 1:
|
||||||
|
url = f"{base}/"
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
page = self.request(url).text
|
||||||
|
|
||||||
def _parse_thread(self, page):
|
def _parse_thread(self, page):
|
||||||
schema = self._extract_jsonld(page)
|
schema = self._extract_jsonld(page)
|
||||||
author = schema["author"]
|
author = schema["author"]
|
||||||
@@ -166,10 +188,22 @@ class BellazonThreadExtractor(BellazonExtractor):
|
|||||||
example = "https://www.bellazon.com/main/topic/123-SLUG/"
|
example = "https://www.bellazon.com/main/topic/123-SLUG/"
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
for page in self._pagination(*self.groups):
|
if (order := self.config("order-posts")) and \
|
||||||
|
order[0] not in ("d", "r"):
|
||||||
|
pages = self._pagination(*self.groups)
|
||||||
|
reverse = False
|
||||||
|
else:
|
||||||
|
pages = self._pagination_reverse(*self.groups)
|
||||||
|
reverse = True
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
if "thread" not in self.kwdict:
|
if "thread" not in self.kwdict:
|
||||||
self.kwdict["thread"] = self._parse_thread(page)
|
self.kwdict["thread"] = self._parse_thread(page)
|
||||||
for html in text.extract_iter(page, "<article ", "</article>"):
|
posts = text.extract_iter(page, "<article ", "</article>")
|
||||||
|
if reverse:
|
||||||
|
posts = list(posts)
|
||||||
|
posts.reverse()
|
||||||
|
for html in posts:
|
||||||
yield self._parse_post(html)
|
yield self._parse_post(html)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -244,6 +244,7 @@ __tests__ = (
|
|||||||
"#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
|
"#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
|
||||||
"#class" : bellazon.BellazonThreadExtractor,
|
"#class" : bellazon.BellazonThreadExtractor,
|
||||||
"#range" : "1-5",
|
"#range" : "1-5",
|
||||||
|
"#options" : {"prder-posts": "asc"},
|
||||||
"#results" : (
|
"#results" : (
|
||||||
"http://img292.echo.cx/my.php?image=4moon011rk.jpg",
|
"http://img292.echo.cx/my.php?image=4moon011rk.jpg",
|
||||||
"http://img294.echo.cx/my.php?image=heroclip3jb.jpg",
|
"http://img294.echo.cx/my.php?image=heroclip3jb.jpg",
|
||||||
|
|||||||
Reference in New Issue
Block a user