diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 9cd3b95c..f96b7403 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -6,9 +6,11 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://gfycat.com/""" +"""Extractors for https://gfycat.com/""" from .common import Extractor, Message +from .. import text +from ..cache import cache class GfycatExtractor(Extractor): @@ -47,11 +49,45 @@ class GfycatExtractor(Extractor): return () +class GfycatUserExtractor(GfycatExtractor): + """Extractor for gfycat user profiles""" + subcategory = "user" + directory_fmt = ("{category}", "{userName}") + pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)" + test = ("https://gfycat.com/@gretta", { + "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4", + "count": ">= 100", + }) + + def gfycats(self): + return GfycatAPI(self).user(self.key) + + +class GfycatSearchExtractor(GfycatExtractor): + """Extractor for gfycat search results""" + subcategory = "search" + directory_fmt = ("{category}", "Search", "{search}") + pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)" + test = ("https://gfycat.com/gifs/search/funny+animals", { + "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4", + "archive": False, + "range": "100-300", + "count": "> 200", + }) + + def metadata(self): + self.key = text.unquote(self.key).replace("+", " ") + return {"search": self.key} + + def gfycats(self): + return GfycatAPI(self).search(self.key) + + class GfycatImageExtractor(GfycatExtractor): """Extractor for individual images from gfycat.com""" subcategory = "image" pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com" - r"/(?:gifs/detail/|\w+/)?([A-Za-z]+)") + r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})") test = ( ("https://gfycat.com/GrayGenerousCowrie", { "url": "e0b5e1d7223108249b15c3c7898dd358dbfae045", @@ -86,3 +122,54 @@ class GfycatImageExtractor(GfycatExtractor): def gfycats(self): url = "https://api.gfycat.com/v1/gfycats/" + self.key return (self.request(url).json()["gfyItem"],) + + +class GfycatAPI(): + API_ROOT = "https://api.gfycat.com" + ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa" + + def __init__(self, extractor): + self.extractor = extractor + self.headers = {} + + def gfycat(self, gfycat_id): + endpoint = "/v1/gfycats/" + gfycat_id + return self._call(endpoint)["gfyItem"] + + def user(self, user): + endpoint = "/v1/users/{}/gfycats".format(user.lower()) + params = {"count": 100} + return self._pagination(endpoint, params) + + def search(self, query): + endpoint = "/v1/gfycats/search" + params = {"search_text": query, "count": 150} + return self._pagination(endpoint, params) + + @cache(keyarg=1, maxage=3600) + def _authenticate_impl(self, category): + url = "https://weblogin." + category + ".com/oauth/webtoken" + data = {"access_key": self.ACCESS_KEY} + headers = {"Referer": self.extractor.root + "/", + "Origin" : self.extractor.root} + response = self.extractor.request( + url, method="POST", headers=headers, json=data) + return "Bearer " + response.json()["access_token"] + + def _call(self, endpoint, params=None): + url = self.API_ROOT + endpoint + self.headers["Authorization"] = self._authenticate_impl( + self.extractor.category) + return self.extractor.request( + url, params=params, headers=self.headers).json() + + def _pagination(self, endpoint, params): + while True: + data = self._call(endpoint, params) + gfycats = data["gfycats"] + yield from gfycats + + if "found" not in data and len(gfycats) < params["count"] or \ + not data["gfycats"]: + return + params["cursor"] = data["cursor"] diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 69460ccd..44778250 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -8,8 +8,8 @@ """Extractors for https://redgifs.com/""" -from .gfycat import GfycatExtractor -from ..cache import cache +from .gfycat import GfycatExtractor, GfycatAPI +from .. import text class RedgifsExtractor(GfycatExtractor): @@ -44,7 +44,7 @@ class RedgifsSearchExtractor(RedgifsExtractor): }) def metadata(self): - self.key = self.key.replace("-", " ") + self.key = text.unquote(self.key).replace("-", " ") return {"search": self.key} def gfycats(self): @@ -68,55 +68,7 @@ class RedgifsImageExtractor(RedgifsExtractor): return (RedgifsAPI(self).gfycat(self.key),) -class RedgifsAPI(): - - def __init__(self, extractor): - self.extractor = extractor - self.headers = {} - - def gfycat(self, gfycat_id): - endpoint = "v1/gfycats/" + gfycat_id - return self._call(endpoint)["gfyItem"] - - def user(self, user): - endpoint = "v1/users/{}/gfycats".format(user.lower()) - params = {"count": 100} - return self._pagination(endpoint, params) - - def search(self, query): - endpoint = "v1/gfycats/search" - params = {"search_text": query, "count": 150} - return self._pagination(endpoint, params) - - @cache(maxage=3600) - def _authenticate_impl(self): - url = "https://weblogin.redgifs.com/oauth/webtoken" - headers = { - "Referer": "https://www.redgifs.com/", - "Origin" : "https://www.redgifs.com", - } - data = { - "access_key": "dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe" - "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9", - } - - response = self.extractor.request( - url, method="POST", headers=headers, json=data) - return "Bearer " + response.json()["access_token"] - - def _call(self, endpoint, params=None): - self.headers["Authorization"] = self._authenticate_impl() - url = "https://napi.redgifs.com/" + endpoint - return self.extractor.request( - url, params=params, headers=self.headers).json() - - def _pagination(self, endpoint, params): - while True: - data = self._call(endpoint, params) - gfycats = data["gfycats"] - yield from gfycats - - if "found" not in data and len(gfycats) < params["count"] or \ - not data["gfycats"]: - return - params["cursor"] = data["cursor"] +class RedgifsAPI(GfycatAPI): + API_ROOT = "https://napi.redgifs.com/" + ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe" + "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")