From cf863f60b3039a7fdfd462509dbf898071cace56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 10 Jun 2020 22:03:52 +0200 Subject: [PATCH] [redgifs] add 'user' and 'search' extractors (closes #724) --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/gfycat.py | 31 ++++++++------ gallery_dl/extractor/redgifs.py | 76 +++++++++++++++++++++++++++++---- 3 files changed, 87 insertions(+), 22 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index a6ea50cf..82b9bd42 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -106,7 +106,7 @@ Read Comic Online https://readcomiconline.to/ Comic Issues, Comics Realbooru https://realbooru.com/ Pools, Posts, Tag Searches RebeccaBlackTech https://rbt.asia/ Threads Reddit https://www.reddit.com/ |reddit-C| Optional (OAuth) -RedGIFs https://redgifs.com/ individual Images +RedGIFs https://redgifs.com/ individual Images, Search Results, User Profiles rule #34 https://rule34.paheal.net/ Posts, Tag Searches Rule 34 https://rule34.xxx/ Pools, Posts, Tag Searches Safebooru https://safebooru.org/ Pools, Posts, Tag Searches diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 2ebbec88..9cd3b95c 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -20,8 +20,17 @@ class GfycatExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) + self.key = match.group(1) self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif") + def items(self): + metadata = self.metadata() + for gfycat in self.gfycats(): + url = self._select_format(gfycat) + gfycat.update(metadata) + yield Message.Directory, gfycat + yield Message.Url, url, gfycat + def _select_format(self, gfyitem): for fmt in self.formats: key = fmt + "Url" @@ -31,9 +40,11 @@ class GfycatExtractor(Extractor): return url return "" - def _get_info(self, gfycat_id): - url = "https://api.gfycat.com/v1/gfycats/" + gfycat_id - return self.request(url).json()["gfyItem"] + def metadata(self): + return {} + + def gfycats(self): + return () class GfycatImageExtractor(GfycatExtractor): @@ -72,12 +83,6 @@ class GfycatImageExtractor(GfycatExtractor): ("https://gfycat.com/ru/UnequaledHastyAnkole"), ) - def __init__(self, match): - GfycatExtractor.__init__(self, match) - self.gfycat_id = match.group(1) - - def items(self): - gfyitem = self._get_info(self.gfycat_id) - yield Message.Version, 1 - yield Message.Directory, gfyitem - yield Message.Url, self._select_format(gfyitem), gfyitem + def gfycats(self): + url = "https://api.gfycat.com/v1/gfycats/" + self.key + return (self.request(url).json()["gfyItem"],) diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 7855eab3..dcbbc0d0 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -8,22 +8,60 @@ """Extractors for https://redgifs.com/""" -from .gfycat import GfycatImageExtractor +from .gfycat import GfycatExtractor from ..cache import cache -class RedgifsImageExtractor(GfycatImageExtractor): - """Extractor for individual images from redgifs.com""" +class RedgifsExtractor(GfycatExtractor): + """Base class for redgifs extractors""" category = "redgifs" + root = "https://www.redgifs.com/" + + +class RedgifsUserExtractor(RedgifsExtractor): + """Extractor for redgifs user profiles""" + subcategory = "user" + directory_fmt = ("{category}", "{userName}") + pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?&#]+)" + test = ("https://www.redgifs.com/users/Natalifiction", { + "pattern": r"https://thcf\d+\.redgifs\.com/[A-Za-z]+\.mp4", + "count": ">= 100", + }) + + def gfycats(self): + return RedgifsAPI(self).user(self.key) + + +class RedgifsSearchExtractor(RedgifsExtractor): + """Extractor for redgifs search results""" + subcategory = "search" + directory_fmt = ("{category}", "Search", "{search}") + pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?&#]+)" + test = ("https://www.redgifs.com/gifs/browse/jav", { + "pattern": r"https://thcf\d+\.redgifs\.com/[A-Za-z]+\.mp4", + "range": "100-300", + "count": "> 200", + }) + + def metadata(self): + self.key = self.key.replace("-", " ") + return {"search": self.key} + + def gfycats(self): + return RedgifsAPI(self).search(self.key) + + +class RedgifsImageExtractor(RedgifsExtractor): + """Extractor for individual gifs from redgifs.com""" + subcategory = "image" pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/watch/([A-Za-z]+)" test = ("https://redgifs.com/watch/foolishforkedabyssiniancat", { "pattern": r"https://\w+.redgifs.com/FoolishForkedAbyssiniancat.mp4", "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533", }) - def _get_info(self, gfycat_id): - api = RedgifsAPI(self) - return api.gfycat(gfycat_id) + def gfycats(self): + return (RedgifsAPI(self).gfycat(self.key),) class RedgifsAPI(): @@ -36,6 +74,16 @@ class RedgifsAPI(): endpoint = "v1/gfycats/" + gfycat_id return self._call(endpoint)["gfyItem"] + def user(self, user): + endpoint = "v1/users/{}/gfycats".format(user.lower()) + params = {"count": 100} + return self._pagination(endpoint, params) + + def search(self, query): + endpoint = "v1/gfycats/search" + params = {"search_text": query, "count": 150} + return self._pagination(endpoint, params) + @cache(maxage=3600) def _authenticate_impl(self): url = "https://weblogin.redgifs.com/oauth/webtoken" @@ -52,7 +100,19 @@ class RedgifsAPI(): url, method="POST", headers=headers, json=data) return "Bearer " + response.json()["access_token"] - def _call(self, endpoint): + def _call(self, endpoint, params=None): self.headers["Authorization"] = self._authenticate_impl() url = "https://napi.redgifs.com/" + endpoint - return self.extractor.request(url, headers=self.headers).json() + return self.extractor.request( + url, params=params, headers=self.headers).json() + + def _pagination(self, endpoint, params): + while True: + data = self._call(endpoint, params) + gfycats = data["gfycats"] + yield from gfycats + + if "found" not in data and len(gfycats) < params["count"] or \ + not data["gfycats"]: + return + params["cursor"] = data["cursor"]