match user profile handling on deviantart and newgrounds
This commit is contained in:
@@ -133,7 +133,7 @@ extractor.*.path-remove
|
||||
-----------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"\\u0000-\\u001f\\u007f"`` (ASCII control characters)
|
||||
Default ``"\u0000-\u001f\u007f"`` (ASCII control characters)
|
||||
Description Set of characters to remove from generated path names.
|
||||
|
||||
Note: In a set with 2 or more characters, ``[]^-\`` need to be
|
||||
@@ -551,16 +551,17 @@ Description Provide a ``folders`` metadata field that contains the names of all
|
||||
extractor.deviantart.include
|
||||
----------------------------
|
||||
=========== =====
|
||||
Type ``list`` of ``strings`` or ``string``
|
||||
Default ``["gallery"]``
|
||||
Description Selects the subcategories to include when processing a user profile.
|
||||
Type ``string`` or ``list`` of ``strings``
|
||||
Default ``"gallery"``
|
||||
Example * ``"favorite,journal,scraps"``
|
||||
* ``["favorite", "journal", "scraps"]``
|
||||
Description A (comma-separated) list of subcategories to include
|
||||
when processing a user profile.
|
||||
|
||||
Possible values are ``"gallery"``, ``"scraps"``, ``"journal"``,
|
||||
``"favorite"``.
|
||||
Possible values are
|
||||
``"gallery"``, ``"scraps"``, ``"journal"``, ``"favorite"``.
|
||||
|
||||
It is also possible to use a string with the initial character of
|
||||
each subcategory, i.e. ``"gsj"`` for
|
||||
``["gallery", "scraps", "journal"]``
|
||||
You can use ``"all"`` instead of listing all values separately.
|
||||
=========== =====
|
||||
|
||||
|
||||
@@ -782,6 +783,23 @@ Description Controls how to handle redirects to CAPTCHA pages.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.newgrounds.include
|
||||
----------------------------
|
||||
=========== =====
|
||||
Type ``string`` or ``list`` of ``strings``
|
||||
Default ``"art"``
|
||||
Example * ``"movies,audio"``
|
||||
* ``["movies", "audio", "scraps"]``
|
||||
Description A (comma-separated) list of subcategories to include
|
||||
when processing a user profile.
|
||||
|
||||
Possible values are
|
||||
``"art"``, ``"audio"``, ``"movies"``.
|
||||
|
||||
You can use ``"all"`` instead of listing all values separately.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.oauth.browser
|
||||
-----------------------
|
||||
=========== =====
|
||||
|
||||
@@ -254,6 +254,26 @@ class Extractor():
|
||||
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
|
||||
return get("date-min", dmin), get("date-max", dmax)
|
||||
|
||||
def _dispatch_extractors(self, extractor_data, default=()):
|
||||
""" """
|
||||
extractors = {
|
||||
data[0].subcategory: data
|
||||
for data in extractor_data
|
||||
}
|
||||
|
||||
include = self.config("include", default) or ()
|
||||
if include == "all":
|
||||
include = extractors
|
||||
elif isinstance(include, str):
|
||||
include = include.split(",")
|
||||
|
||||
result = [(Message.Version, 1)]
|
||||
for category in include:
|
||||
if category in extractors:
|
||||
extr, url = extractors[category]
|
||||
result.append((Message.Queue, url, {"_extractor": extr}))
|
||||
return iter(result)
|
||||
|
||||
@classmethod
|
||||
def _get_tests(cls):
|
||||
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
|
||||
|
||||
@@ -265,46 +265,30 @@ class DeviantartExtractor(Extractor):
|
||||
content.update(download)
|
||||
|
||||
|
||||
class DeviantartUserExtractor(Extractor):
|
||||
class DeviantartUserExtractor(DeviantartExtractor):
|
||||
"""Extractor for an artist's user profile"""
|
||||
category = "deviantart"
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/?$"
|
||||
test = (
|
||||
("https://www.deviantart.com/shimoda7", {
|
||||
"options": (("include", "gsjf"),),
|
||||
"pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)",
|
||||
"pattern": r"/shimoda7/gallery$",
|
||||
}),
|
||||
("https://www.deviantart.com/shimoda7", {
|
||||
"options": (("include", "all"),),
|
||||
"pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)$",
|
||||
"count": 4,
|
||||
}),
|
||||
("https://shimoda7.deviantart.com/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.user = match.group(1) or match.group(2)
|
||||
|
||||
incl = self.config("include") or "g"
|
||||
if isinstance(incl, list):
|
||||
incl = "".join(item[0] for item in incl if item)
|
||||
self.include = incl.lower()
|
||||
|
||||
def items(self):
|
||||
base = "https://www.deviantart.com/{}/".format(self.user)
|
||||
incl = self.include
|
||||
data = {}
|
||||
|
||||
if "g" in incl:
|
||||
data["_extractor"] = DeviantartGalleryExtractor
|
||||
yield Message.Queue, base + "gallery", data
|
||||
if "s" in incl:
|
||||
data["_extractor"] = DeviantartScrapsExtractor
|
||||
yield Message.Queue, base + "gallery/scraps", data
|
||||
if "j" in incl:
|
||||
data["_extractor"] = DeviantartJournalExtractor
|
||||
yield Message.Queue, base + "posts", data
|
||||
if "f" in incl:
|
||||
data["_extractor"] = DeviantartFavoriteExtractor
|
||||
yield Message.Queue, base + "favourites", data
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
return self._dispatch_extractors((
|
||||
(DeviantartGalleryExtractor , base + "gallery"),
|
||||
(DeviantartScrapsExtractor , base + "gallery/scraps"),
|
||||
(DeviantartJournalExtractor , base + "posts"),
|
||||
(DeviantartFavoriteExtractor, base + "favourites"),
|
||||
), ("gallery",))
|
||||
|
||||
|
||||
class DeviantartGalleryExtractor(DeviantartExtractor):
|
||||
|
||||
@@ -319,7 +319,6 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
|
||||
test = (
|
||||
("https://tomfulp.newgrounds.com", {
|
||||
"pattern": "https://tomfulp.newgrounds.com/art$",
|
||||
"count": 1,
|
||||
}),
|
||||
("https://tomfulp.newgrounds.com", {
|
||||
"options": (("include", "all"),),
|
||||
@@ -329,22 +328,9 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
|
||||
)
|
||||
|
||||
def items(self):
|
||||
data = {}
|
||||
extr_map = {
|
||||
"art": NewgroundsArtExtractor,
|
||||
"audio": NewgroundsAudioExtractor,
|
||||
"movies": NewgroundsMoviesExtractor,
|
||||
}
|
||||
|
||||
include = self.config("include", ("art",)) or ()
|
||||
if include == "all":
|
||||
include = extr_map.keys()
|
||||
elif isinstance(include, str):
|
||||
include = include.split(",")
|
||||
|
||||
yield Message.Version, 1
|
||||
for category in include:
|
||||
if category in extr_map:
|
||||
url = self.user_root + "/" + category
|
||||
data["_extractor"] = extr_map[category]
|
||||
yield Message.Queue, url, data
|
||||
base = self.user_root + "/"
|
||||
return self._dispatch_extractors((
|
||||
(NewgroundsArtExtractor , base + "art"),
|
||||
(NewgroundsAudioExtractor , base + "audio"),
|
||||
(NewgroundsMoviesExtractor, base + "movies"),
|
||||
), ("art",))
|
||||
|
||||
Reference in New Issue
Block a user