[newgrounds] support page numbers in URLs (#6320)
https://USER.newgrounds.com/art/?page=5 https://USER.newgrounds.com/art/page/5
This commit is contained in:
@@ -14,6 +14,9 @@ from ..cache import cache
|
|||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com"
|
||||||
|
USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com"
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsExtractor(Extractor):
|
class NewgroundsExtractor(Extractor):
|
||||||
"""Base class for newgrounds extractors"""
|
"""Base class for newgrounds extractors"""
|
||||||
@@ -93,7 +96,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
"""Return URLs of all relevant post pages"""
|
"""Return URLs of all relevant post pages"""
|
||||||
return self._pagination(self._path)
|
return self._pagination(self._path, self.groups[1])
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
"""Return general metadata"""
|
"""Return general metadata"""
|
||||||
@@ -334,10 +337,10 @@ class NewgroundsExtractor(Extractor):
|
|||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
yield fmt[1][0]["src"]
|
yield fmt[1][0]["src"]
|
||||||
|
|
||||||
def _pagination(self, kind):
|
def _pagination(self, kind, pnum=1):
|
||||||
url = "{}/{}".format(self.user_root, kind)
|
url = "{}/{}".format(self.user_root, kind)
|
||||||
params = {
|
params = {
|
||||||
"page": 1,
|
"page": text.parse_int(pnum, 1),
|
||||||
"isAjaxRequest": "1",
|
"isAjaxRequest": "1",
|
||||||
}
|
}
|
||||||
headers = {
|
headers = {
|
||||||
@@ -400,8 +403,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
|
|||||||
class NewgroundsMediaExtractor(NewgroundsExtractor):
|
class NewgroundsMediaExtractor(NewgroundsExtractor):
|
||||||
"""Extractor for a media file from newgrounds.com"""
|
"""Extractor for a media file from newgrounds.com"""
|
||||||
subcategory = "media"
|
subcategory = "media"
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
|
pattern = BASE_PATTERN + r"(/(?:portal/view|audio/listen)/\d+)"
|
||||||
r"(/(?:portal/view|audio/listen)/\d+)")
|
|
||||||
example = "https://www.newgrounds.com/portal/view/12345"
|
example = "https://www.newgrounds.com/portal/view/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -416,35 +418,35 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
|
|||||||
class NewgroundsArtExtractor(NewgroundsExtractor):
|
class NewgroundsArtExtractor(NewgroundsExtractor):
|
||||||
"""Extractor for all images of a newgrounds user"""
|
"""Extractor for all images of a newgrounds user"""
|
||||||
subcategory = _path = "art"
|
subcategory = _path = "art"
|
||||||
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
|
pattern = USER_PATTERN + r"/art(?:(?:/page/|/?\?page=)(\d+))?/?$"
|
||||||
example = "https://USER.newgrounds.com/art"
|
example = "https://USER.newgrounds.com/art"
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsAudioExtractor(NewgroundsExtractor):
|
class NewgroundsAudioExtractor(NewgroundsExtractor):
|
||||||
"""Extractor for all audio submissions of a newgrounds user"""
|
"""Extractor for all audio submissions of a newgrounds user"""
|
||||||
subcategory = _path = "audio"
|
subcategory = _path = "audio"
|
||||||
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
|
pattern = USER_PATTERN + r"/audio(?:(?:/page/|/?\?page=)(\d+))?/?$"
|
||||||
example = "https://USER.newgrounds.com/audio"
|
example = "https://USER.newgrounds.com/audio"
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsMoviesExtractor(NewgroundsExtractor):
|
class NewgroundsMoviesExtractor(NewgroundsExtractor):
|
||||||
"""Extractor for all movies of a newgrounds user"""
|
"""Extractor for all movies of a newgrounds user"""
|
||||||
subcategory = _path = "movies"
|
subcategory = _path = "movies"
|
||||||
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
|
pattern = USER_PATTERN + r"/movies(?:(?:/page/|/?\?page=)(\d+))?/?$"
|
||||||
example = "https://USER.newgrounds.com/movies"
|
example = "https://USER.newgrounds.com/movies"
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsGamesExtractor(NewgroundsExtractor):
|
class NewgroundsGamesExtractor(NewgroundsExtractor):
|
||||||
"""Extractor for a newgrounds user's games"""
|
"""Extractor for a newgrounds user's games"""
|
||||||
subcategory = _path = "games"
|
subcategory = _path = "games"
|
||||||
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/games/?$"
|
pattern = USER_PATTERN + r"/games(?:(?:/page/|/?\?page=)(\d+))?/?$"
|
||||||
example = "https://USER.newgrounds.com/games"
|
example = "https://USER.newgrounds.com/games"
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsUserExtractor(NewgroundsExtractor):
|
class NewgroundsUserExtractor(NewgroundsExtractor):
|
||||||
"""Extractor for a newgrounds user profile"""
|
"""Extractor for a newgrounds user profile"""
|
||||||
subcategory = "user"
|
subcategory = "user"
|
||||||
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
|
pattern = USER_PATTERN + r"/?$"
|
||||||
example = "https://USER.newgrounds.com"
|
example = "https://USER.newgrounds.com"
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
@@ -464,25 +466,22 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
|
|||||||
"""Extractor for posts favorited by a newgrounds user"""
|
"""Extractor for posts favorited by a newgrounds user"""
|
||||||
subcategory = "favorite"
|
subcategory = "favorite"
|
||||||
directory_fmt = ("{category}", "{user}", "Favorites")
|
directory_fmt = ("{category}", "{user}", "Favorites")
|
||||||
pattern = (r"(?:https?://)?([\w-]+)\.newgrounds\.com"
|
pattern = (USER_PATTERN + r"/favorites(?!/following)(?:/(art|audio|movies)"
|
||||||
r"/favorites(?!/following)(?:/(art|audio|movies))?/?")
|
r"(?:(?:/page/|/?\?page=)(\d+))?)?")
|
||||||
example = "https://USER.newgrounds.com/favorites"
|
example = "https://USER.newgrounds.com/favorites"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
NewgroundsExtractor.__init__(self, match)
|
|
||||||
self.kind = match.group(2)
|
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
if self.kind:
|
_, kind, pnum = self.groups
|
||||||
return self._pagination(self.kind)
|
if kind:
|
||||||
|
return self._pagination_favorites(kind, pnum)
|
||||||
return itertools.chain.from_iterable(
|
return itertools.chain.from_iterable(
|
||||||
self._pagination(k) for k in ("art", "audio", "movies")
|
self._pagination_favorites(k) for k in ("art", "audio", "movies")
|
||||||
)
|
)
|
||||||
|
|
||||||
def _pagination(self, kind):
|
def _pagination_favorites(self, kind, pnum=1):
|
||||||
url = "{}/favorites/{}".format(self.user_root, kind)
|
url = "{}/favorites/{}".format(self.user_root, kind)
|
||||||
params = {
|
params = {
|
||||||
"page": 1,
|
"page": text.parse_int(pnum, 1),
|
||||||
"isAjaxRequest": "1",
|
"isAjaxRequest": "1",
|
||||||
}
|
}
|
||||||
headers = {
|
headers = {
|
||||||
@@ -514,12 +513,13 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
|
|||||||
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
||||||
"""Extractor for a newgrounds user's favorited users"""
|
"""Extractor for a newgrounds user's favorited users"""
|
||||||
subcategory = "following"
|
subcategory = "following"
|
||||||
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/favorites/(following)"
|
pattern = USER_PATTERN + r"/favorites/(following)"
|
||||||
example = "https://USER.newgrounds.com/favorites/following"
|
example = "https://USER.newgrounds.com/favorites/following"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
_, kind, pnum = self.groups
|
||||||
data = {"_extractor": NewgroundsUserExtractor}
|
data = {"_extractor": NewgroundsUserExtractor}
|
||||||
for url in self._pagination(self.kind):
|
for url in self._pagination_favorites(kind, pnum):
|
||||||
yield Message.Queue, url, data
|
yield Message.Queue, url, data
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -534,13 +534,12 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
|
|||||||
"""Extractor for newgrounds.com search reesults"""
|
"""Extractor for newgrounds.com search reesults"""
|
||||||
subcategory = "search"
|
subcategory = "search"
|
||||||
directory_fmt = ("{category}", "search", "{search_tags}")
|
directory_fmt = ("{category}", "search", "{search_tags}")
|
||||||
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
|
pattern = BASE_PATTERN + r"/search/conduct/([^/?#]+)/?\?([^#]+)"
|
||||||
r"/search/conduct/([^/?#]+)/?\?([^#]+)")
|
|
||||||
example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY"
|
example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
NewgroundsExtractor.__init__(self, match)
|
NewgroundsExtractor.__init__(self, match)
|
||||||
self._path, query = match.groups()
|
self._path, query = self.groups
|
||||||
self.query = text.parse_query(query)
|
self.query = text.parse_query(query)
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
@@ -550,19 +549,20 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
|
|||||||
for s in suitabilities.split(",")}
|
for s in suitabilities.split(",")}
|
||||||
self.request(self.root + "/suitabilities",
|
self.request(self.root + "/suitabilities",
|
||||||
method="POST", data=data)
|
method="POST", data=data)
|
||||||
return self._pagination("/search/conduct/" + self._path, self.query)
|
return self._pagination_search(
|
||||||
|
"/search/conduct/" + self._path, self.query)
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search_tags": self.query.get("terms", "")}
|
return {"search_tags": self.query.get("terms", "")}
|
||||||
|
|
||||||
def _pagination(self, path, params):
|
def _pagination_search(self, path, params):
|
||||||
url = self.root + path
|
url = self.root + path
|
||||||
|
params["inner"] = "1"
|
||||||
|
params["page"] = text.parse_int(params.get("page"), 1)
|
||||||
headers = {
|
headers = {
|
||||||
"Accept": "application/json, text/javascript, */*; q=0.01",
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
||||||
"X-Requested-With": "XMLHttpRequest",
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
}
|
}
|
||||||
params["inner"] = "1"
|
|
||||||
params["page"] = 1
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params, headers=headers).json()
|
data = self.request(url, params=params, headers=headers).json()
|
||||||
|
|||||||
@@ -267,48 +267,67 @@ From The ZJ "Late """,
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/art",
|
"#url" : "https://tomfulp.newgrounds.com/art",
|
||||||
"#category": ("", "newgrounds", "art"),
|
|
||||||
"#class" : newgrounds.NewgroundsArtExtractor,
|
"#class" : newgrounds.NewgroundsArtExtractor,
|
||||||
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern,
|
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern,
|
||||||
"#count" : ">= 3",
|
"#count" : ">= 3",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://tomfulp.newgrounds.com/art/page/3",
|
||||||
|
"#class" : newgrounds.NewgroundsArtExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://tomfulp.newgrounds.com/art?page=3",
|
||||||
|
"#class" : newgrounds.NewgroundsArtExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/audio",
|
"#url" : "https://tomfulp.newgrounds.com/audio",
|
||||||
"#category": ("", "newgrounds", "audio"),
|
|
||||||
"#class" : newgrounds.NewgroundsAudioExtractor,
|
"#class" : newgrounds.NewgroundsAudioExtractor,
|
||||||
"#pattern" : r"https://(audio\.ngfiles\.com/\d+/\d+_.+\.mp3|uploads\.ungrounded\.net/.+\.png)",
|
"#pattern" : r"https://(audio\.ngfiles\.com/\d+/\d+_.+\.mp3|uploads\.ungrounded\.net/.+\.png)",
|
||||||
"#count" : ">= 10",
|
"#count" : ">= 10",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://tomfulp.newgrounds.com/audio?page=3",
|
||||||
|
"#class" : newgrounds.NewgroundsAudioExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/movies",
|
"#url" : "https://tomfulp.newgrounds.com/movies",
|
||||||
"#category": ("", "newgrounds", "movies"),
|
|
||||||
"#class" : newgrounds.NewgroundsMoviesExtractor,
|
"#class" : newgrounds.NewgroundsMoviesExtractor,
|
||||||
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
|
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
|
||||||
"#range" : "1-10",
|
"#range" : "1-10",
|
||||||
"#count" : 10,
|
"#count" : 10,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://tomfulp.newgrounds.com/movies/?page=3",
|
||||||
|
"#class" : newgrounds.NewgroundsMoviesExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/games",
|
"#url" : "https://tomfulp.newgrounds.com/games",
|
||||||
"#category": ("", "newgrounds", "games"),
|
|
||||||
"#class" : newgrounds.NewgroundsGamesExtractor,
|
"#class" : newgrounds.NewgroundsGamesExtractor,
|
||||||
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/(\d+/\d+_.+|tmp/.+)",
|
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/(\d+/\d+_.+|tmp/.+)",
|
||||||
"#range" : "1-10",
|
"#range" : "1-10",
|
||||||
"#count" : 10,
|
"#count" : 10,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://tomfulp.newgrounds.com/games?page=3",
|
||||||
|
"#class" : newgrounds.NewgroundsGamesExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com",
|
"#url" : "https://tomfulp.newgrounds.com",
|
||||||
"#category": ("", "newgrounds", "user"),
|
|
||||||
"#class" : newgrounds.NewgroundsUserExtractor,
|
"#class" : newgrounds.NewgroundsUserExtractor,
|
||||||
"#urls" : "https://tomfulp.newgrounds.com/art",
|
"#urls" : "https://tomfulp.newgrounds.com/art",
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com",
|
"#url" : "https://tomfulp.newgrounds.com",
|
||||||
"#category": ("", "newgrounds", "user"),
|
|
||||||
"#class" : newgrounds.NewgroundsUserExtractor,
|
"#class" : newgrounds.NewgroundsUserExtractor,
|
||||||
"#options" : {"include": "all"},
|
"#options" : {"include": "all"},
|
||||||
"#urls" : (
|
"#urls" : (
|
||||||
@@ -321,42 +340,47 @@ From The ZJ "Late """,
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/favorites/art",
|
"#url" : "https://tomfulp.newgrounds.com/favorites/art",
|
||||||
"#category": ("", "newgrounds", "favorite"),
|
|
||||||
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
||||||
"#range" : "1-10",
|
"#range" : "1-10",
|
||||||
"#count" : ">= 10",
|
"#count" : ">= 10",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://tomfulp.newgrounds.com/favorites/art?page=3",
|
||||||
|
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/favorites/audio",
|
"#url" : "https://tomfulp.newgrounds.com/favorites/audio",
|
||||||
"#category": ("", "newgrounds", "favorite"),
|
|
||||||
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/favorites/movies",
|
"#url" : "https://tomfulp.newgrounds.com/favorites/movies",
|
||||||
"#category": ("", "newgrounds", "favorite"),
|
|
||||||
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/favorites/",
|
"#url" : "https://tomfulp.newgrounds.com/favorites/",
|
||||||
"#category": ("", "newgrounds", "favorite"),
|
|
||||||
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
"#class" : newgrounds.NewgroundsFavoriteExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/favorites/following",
|
"#url" : "https://tomfulp.newgrounds.com/favorites/following",
|
||||||
"#category": ("", "newgrounds", "following"),
|
|
||||||
"#class" : newgrounds.NewgroundsFollowingExtractor,
|
"#class" : newgrounds.NewgroundsFollowingExtractor,
|
||||||
"#pattern" : newgrounds.NewgroundsUserExtractor.pattern,
|
"#pattern" : newgrounds.NewgroundsUserExtractor.pattern,
|
||||||
"#range" : "76-125",
|
"#range" : "76-125",
|
||||||
"#count" : 50,
|
"#count" : 50,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://tomfulp.newgrounds.com/favorites/following?page=3",
|
||||||
|
"#class" : newgrounds.NewgroundsFollowingExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.newgrounds.com/search/conduct/art?terms=tree",
|
"#url" : "https://www.newgrounds.com/search/conduct/art?terms=tree",
|
||||||
"#category": ("", "newgrounds", "search"),
|
|
||||||
"#class" : newgrounds.NewgroundsSearchExtractor,
|
"#class" : newgrounds.NewgroundsSearchExtractor,
|
||||||
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern,
|
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern,
|
||||||
"#range" : "1-10",
|
"#range" : "1-10",
|
||||||
@@ -367,7 +391,6 @@ From The ZJ "Late """,
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.newgrounds.com/search/conduct/movies?terms=tree",
|
"#url" : "https://www.newgrounds.com/search/conduct/movies?terms=tree",
|
||||||
"#category": ("", "newgrounds", "search"),
|
|
||||||
"#class" : newgrounds.NewgroundsSearchExtractor,
|
"#class" : newgrounds.NewgroundsSearchExtractor,
|
||||||
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+",
|
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+",
|
||||||
"#range" : "1-10",
|
"#range" : "1-10",
|
||||||
@@ -376,7 +399,6 @@ From The ZJ "Late """,
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.newgrounds.com/search/conduct/audio?advanced=1&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm",
|
"#url" : "https://www.newgrounds.com/search/conduct/audio?advanced=1&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm",
|
||||||
"#category": ("", "newgrounds", "search"),
|
|
||||||
"#class" : newgrounds.NewgroundsSearchExtractor,
|
"#class" : newgrounds.NewgroundsSearchExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user