[gfycat] add 'user' and 'search' extractors

2020-07-16 14:48:31 +02:00
parent 11b744d971
commit cf44571fe0
2 changed files with 96 additions and 57 deletions
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -6,9 +6,11 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
-"""Extract images from https://gfycat.com/"""
+"""Extractors for https://gfycat.com/"""
 from .common import Extractor, Message
 from .. import text
 from ..cache import cache
 class GfycatExtractor(Extractor):
@@ -47,11 +49,45 @@ class GfycatExtractor(Extractor):
        return ()
 class GfycatUserExtractor(GfycatExtractor):
    """Extractor for gfycat user profiles"""
    subcategory = "user"
    directory_fmt = ("{category}", "{userName}")
    pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"
    test = ("https://gfycat.com/@gretta", {
        "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
        "count": ">= 100",
    })
    def gfycats(self):
        return GfycatAPI(self).user(self.key)
 class GfycatSearchExtractor(GfycatExtractor):
    """Extractor for gfycat search results"""
    subcategory = "search"
    directory_fmt = ("{category}", "Search", "{search}")
    pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"
    test = ("https://gfycat.com/gifs/search/funny+animals", {
        "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
        "archive": False,
        "range": "100-300",
        "count": "> 200",
    })
    def metadata(self):
        self.key = text.unquote(self.key).replace("+", " ")
        return {"search": self.key}
    def gfycats(self):
        return GfycatAPI(self).search(self.key)
 class GfycatImageExtractor(GfycatExtractor):
    """Extractor for individual images from gfycat.com"""
    subcategory = "image"
    pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
-               r"/(?:gifs/detail/|\w+/)?([A-Za-z]+)")
+               r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
    test = (
        ("https://gfycat.com/GrayGenerousCowrie", {
            "url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
@@ -86,3 +122,54 @@ class GfycatImageExtractor(GfycatExtractor):
    def gfycats(self):
        url = "https://api.gfycat.com/v1/gfycats/" + self.key
        return (self.request(url).json()["gfyItem"],)
 class GfycatAPI():
    API_ROOT = "https://api.gfycat.com"
    ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
    def __init__(self, extractor):
        self.extractor = extractor
        self.headers = {}
    def gfycat(self, gfycat_id):
        endpoint = "/v1/gfycats/" + gfycat_id
        return self._call(endpoint)["gfyItem"]
    def user(self, user):
        endpoint = "/v1/users/{}/gfycats".format(user.lower())
        params = {"count": 100}
        return self._pagination(endpoint, params)
    def search(self, query):
        endpoint = "/v1/gfycats/search"
        params = {"search_text": query, "count": 150}
        return self._pagination(endpoint, params)
    @cache(keyarg=1, maxage=3600)
    def _authenticate_impl(self, category):
        url = "https://weblogin." + category + ".com/oauth/webtoken"
        data = {"access_key": self.ACCESS_KEY}
        headers = {"Referer": self.extractor.root + "/",
                   "Origin" : self.extractor.root}
        response = self.extractor.request(
            url, method="POST", headers=headers, json=data)
        return "Bearer " + response.json()["access_token"]
    def _call(self, endpoint, params=None):
        url = self.API_ROOT + endpoint
        self.headers["Authorization"] = self._authenticate_impl(
            self.extractor.category)
        return self.extractor.request(
            url, params=params, headers=self.headers).json()
    def _pagination(self, endpoint, params):
        while True:
            data = self._call(endpoint, params)
            gfycats = data["gfycats"]
            yield from gfycats
            if "found" not in data and len(gfycats) < params["count"] or \
                    not data["gfycats"]:
                return
            params["cursor"] = data["cursor"]
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -8,8 +8,8 @@
 """Extractors for https://redgifs.com/"""
-from .gfycat import GfycatExtractor
+from .gfycat import GfycatExtractor, GfycatAPI
-from ..cache import cache
+from .. import text
 class RedgifsExtractor(GfycatExtractor):
@@ -44,7 +44,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
    })
    def metadata(self):
-        self.key = self.key.replace("-", " ")
+        self.key = text.unquote(self.key).replace("-", " ")
        return {"search": self.key}
    def gfycats(self):
@@ -68,55 +68,7 @@ class RedgifsImageExtractor(RedgifsExtractor):
        return (RedgifsAPI(self).gfycat(self.key),)
-class RedgifsAPI():
+class RedgifsAPI(GfycatAPI):
-
+    API_ROOT = "https://napi.redgifs.com/"
-    def __init__(self, extractor):
+    ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
-        self.extractor = extractor
+                  "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")
        self.headers = {}
    def gfycat(self, gfycat_id):
        endpoint = "v1/gfycats/" + gfycat_id
        return self._call(endpoint)["gfyItem"]
    def user(self, user):
        endpoint = "v1/users/{}/gfycats".format(user.lower())
        params = {"count": 100}
        return self._pagination(endpoint, params)
    def search(self, query):
        endpoint = "v1/gfycats/search"
        params = {"search_text": query, "count": 150}
        return self._pagination(endpoint, params)
    @cache(maxage=3600)
    def _authenticate_impl(self):
        url = "https://weblogin.redgifs.com/oauth/webtoken"
        headers = {
            "Referer": "https://www.redgifs.com/",
            "Origin" : "https://www.redgifs.com",
        }
        data = {
            "access_key": "dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
                          "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9",
        }
        response = self.extractor.request(
            url, method="POST", headers=headers, json=data)
        return "Bearer " + response.json()["access_token"]
    def _call(self, endpoint, params=None):
        self.headers["Authorization"] = self._authenticate_impl()
        url = "https://napi.redgifs.com/" + endpoint
        return self.extractor.request(
            url, params=params, headers=self.headers).json()
    def _pagination(self, endpoint, params):
        while True:
            data = self._call(endpoint, params)
            gfycats = data["gfycats"]
            yield from gfycats
            if "found" not in data and len(gfycats) < params["count"] or \
                    not data["gfycats"]:
                return
            params["cursor"] = data["cursor"]