[gfycat] add 'user' and 'search' extractors

This commit is contained in:
Mike Fährmann
2020-07-16 14:48:31 +02:00
parent 11b744d971
commit cf44571fe0
2 changed files with 96 additions and 57 deletions

View File

@@ -6,9 +6,11 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://gfycat.com/"""
"""Extractors for https://gfycat.com/"""
from .common import Extractor, Message
from .. import text
from ..cache import cache
class GfycatExtractor(Extractor):
@@ -47,11 +49,45 @@ class GfycatExtractor(Extractor):
return ()
class GfycatUserExtractor(GfycatExtractor):
"""Extractor for gfycat user profiles"""
subcategory = "user"
directory_fmt = ("{category}", "{userName}")
pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"
test = ("https://gfycat.com/@gretta", {
"pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
"count": ">= 100",
})
def gfycats(self):
return GfycatAPI(self).user(self.key)
class GfycatSearchExtractor(GfycatExtractor):
"""Extractor for gfycat search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"
test = ("https://gfycat.com/gifs/search/funny+animals", {
"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
"archive": False,
"range": "100-300",
"count": "> 200",
})
def metadata(self):
self.key = text.unquote(self.key).replace("+", " ")
return {"search": self.key}
def gfycats(self):
return GfycatAPI(self).search(self.key)
class GfycatImageExtractor(GfycatExtractor):
"""Extractor for individual images from gfycat.com"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
r"/(?:gifs/detail/|\w+/)?([A-Za-z]+)")
r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
test = (
("https://gfycat.com/GrayGenerousCowrie", {
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
@@ -86,3 +122,54 @@ class GfycatImageExtractor(GfycatExtractor):
def gfycats(self):
url = "https://api.gfycat.com/v1/gfycats/" + self.key
return (self.request(url).json()["gfyItem"],)
class GfycatAPI():
API_ROOT = "https://api.gfycat.com"
ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
def __init__(self, extractor):
self.extractor = extractor
self.headers = {}
def gfycat(self, gfycat_id):
endpoint = "/v1/gfycats/" + gfycat_id
return self._call(endpoint)["gfyItem"]
def user(self, user):
endpoint = "/v1/users/{}/gfycats".format(user.lower())
params = {"count": 100}
return self._pagination(endpoint, params)
def search(self, query):
endpoint = "/v1/gfycats/search"
params = {"search_text": query, "count": 150}
return self._pagination(endpoint, params)
@cache(keyarg=1, maxage=3600)
def _authenticate_impl(self, category):
url = "https://weblogin." + category + ".com/oauth/webtoken"
data = {"access_key": self.ACCESS_KEY}
headers = {"Referer": self.extractor.root + "/",
"Origin" : self.extractor.root}
response = self.extractor.request(
url, method="POST", headers=headers, json=data)
return "Bearer " + response.json()["access_token"]
def _call(self, endpoint, params=None):
url = self.API_ROOT + endpoint
self.headers["Authorization"] = self._authenticate_impl(
self.extractor.category)
return self.extractor.request(
url, params=params, headers=self.headers).json()
def _pagination(self, endpoint, params):
while True:
data = self._call(endpoint, params)
gfycats = data["gfycats"]
yield from gfycats
if "found" not in data and len(gfycats) < params["count"] or \
not data["gfycats"]:
return
params["cursor"] = data["cursor"]