[aryion] add 'recursive' option (fixes #832)
This is enabled by default and will recursively go through all (sub)folders in an artist's gallery. The old method of using "Latest Updates" lists can be restored by disabling this option.
This commit is contained in:
@@ -546,6 +546,19 @@ Description Try to follow external URLs of embedded players.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.aryion.recursive
|
||||
--------------------------
|
||||
=========== =====
|
||||
Type ``bool``
|
||||
Default ``true``
|
||||
Description Controls the post extraction strategy.
|
||||
|
||||
* ``true``: Start on users' main gallery pages and recursively
|
||||
descend into subfolders
|
||||
* ``false``: Get posts from "Latest Updates" pages
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.blogger.videos
|
||||
------------------------
|
||||
=========== =====
|
||||
|
||||
@@ -26,9 +26,24 @@ class AryionExtractor(Extractor):
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.user = match.group(1)
|
||||
self.offset = 0
|
||||
self.recursive = True
|
||||
|
||||
def posts(self, url):
|
||||
def items(self):
|
||||
for post_id in self.posts():
|
||||
post = self._parse_post(post_id)
|
||||
if post:
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, post["url"], post
|
||||
elif post is False and self.recursive:
|
||||
base = self.root + "/g4/view/"
|
||||
data = {"_extractor": AryionPostExtractor}
|
||||
for post_id in self._pagination(base + post_id):
|
||||
yield Message.Queue, base + post_id, data
|
||||
|
||||
def posts(self):
|
||||
"""Yield relevant post IDs"""
|
||||
|
||||
def _pagination(self, url):
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
yield from text.extract_iter(
|
||||
@@ -39,11 +54,14 @@ class AryionExtractor(Extractor):
|
||||
return
|
||||
url = self.root + text.rextract(page, "href='", "'", pos)[0]
|
||||
|
||||
def parse_post(self, post_id):
|
||||
def _parse_post(self, post_id):
|
||||
url = "{}/g4/data.php?id={}".format(self.root, post_id)
|
||||
with self.request(url, method="HEAD", fatal=False) as response:
|
||||
|
||||
if response.status_code >= 400:
|
||||
self.log.warning(
|
||||
"Unable to fetch post %s ('%s %s')",
|
||||
post_id, response.status_code, response.reason)
|
||||
return None
|
||||
headers = response.headers
|
||||
|
||||
@@ -106,9 +124,11 @@ class AryionExtractor(Extractor):
|
||||
class AryionGalleryExtractor(AryionExtractor):
|
||||
"""Extractor for a user's gallery on eka's portal"""
|
||||
subcategory = "gallery"
|
||||
categorytransfer = True
|
||||
pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)"
|
||||
test = (
|
||||
("https://aryion.com/g4/gallery/jameshoward", {
|
||||
"options": (("recursive", False),),
|
||||
"pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$",
|
||||
"range": "48-52",
|
||||
"count": 5,
|
||||
@@ -117,17 +137,24 @@ class AryionGalleryExtractor(AryionExtractor):
|
||||
("https://aryion.com/g4/latest.php?name=jameshoward"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
AryionExtractor.__init__(self, match)
|
||||
self.recursive = self.config("recursive", True)
|
||||
self.offset = 0
|
||||
|
||||
def skip(self, num):
|
||||
if self.recursive:
|
||||
num = 0
|
||||
self.offset += num
|
||||
return num
|
||||
|
||||
def items(self):
|
||||
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
|
||||
for post_id in util.advance(self.posts(url), self.offset):
|
||||
post = self.parse_post(post_id)
|
||||
if post:
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, post["url"], post
|
||||
def posts(self):
|
||||
if self.recursive:
|
||||
url = "{}/g4/gallery/{}".format(self.root, self.user)
|
||||
return self._pagination(url)
|
||||
else:
|
||||
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
|
||||
return util.advance(self._pagination(url), self.offset)
|
||||
|
||||
|
||||
class AryionPostExtractor(AryionExtractor):
|
||||
@@ -164,19 +191,6 @@ class AryionPostExtractor(AryionExtractor):
|
||||
}),
|
||||
)
|
||||
|
||||
def items(self):
|
||||
post_id = self.user
|
||||
self.user = None
|
||||
post = self.parse_post(post_id)
|
||||
|
||||
if post:
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, post["url"], post
|
||||
|
||||
elif post is False:
|
||||
folder_url = "{}/g4/view/{}".format(self.root, post_id)
|
||||
data = {"_extractor": AryionPostExtractor}
|
||||
|
||||
for post_id in self.posts(folder_url):
|
||||
url = "{}/g4/view/{}".format(self.root, post_id)
|
||||
yield Message.Queue, url, data
|
||||
def posts(self):
|
||||
post_id, self.user = self.user, None
|
||||
return (post_id,)
|
||||
|
||||
Reference in New Issue
Block a user