[flickr] add support for flic.kr/p/... URLs

Example: https://flic.kr/p/FPVo9U
2017-06-01 18:14:33 +02:00
parent 93e5d8cba3
commit 72f1c6f87a
2 changed files with 43 additions and 23 deletions
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -9,7 +9,7 @@
 """Extract images from https://www.flickr.com/"""

 from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception


 class FlickrExtractor(Extractor):
@@ -21,28 +21,14 @@ class FlickrExtractor(Extractor):
        Extractor.__init__(self)
        self.api = FlickrAPI(self)
        self.item_id = match.group(1)
-        self.metadata = self.config("metadata", False)
-
-    @staticmethod
-    def _clean(photo):
-        del photo["comments"]
-        del photo["views"]
-
-        photo["title"] = photo["title"]["_content"]
-        photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
-
-        if "location" in photo:
-            location = photo["location"]
-            for key, value in location.items():
-                if isinstance(value, dict):
-                    location[key] = value["_content"]


 class FlickrImageExtractor(FlickrExtractor):
    """Extractor for individual images from flickr.com"""
    subcategory = "image"
-    pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/]+/(\d+)",
-               r"(?:https?://)?[^.]+\.staticflickr\.com/(?:\d+/)+(\d+)_"]
+    pattern = [r"(?:https?://)?(?:www\.|m\.)?flickr\.com/photos/[^/]+/(\d+)",
+               r"(?:https?://)?[^.]+\.static\.?flickr\.com/(?:\d+/)+(\d+)_",
+               r"(?:https?://)?flic\.kr/(p)/([A-Za-z1-9]+)"]
    test = [
        ("https://www.flickr.com/photos/departingyyz/16089302239", {
            "url": "7f0887f5953f61c8b79a695cb102ea309c0346b0",
@@ -50,13 +36,23 @@ class FlickrImageExtractor(FlickrExtractor):
            "content": "6aaad7512d335ca93286fe2046e7fe3bb93d808e",
        }),
        ("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", {
-            "url": "40f5163488522ca5d918750ed7bd7fcf437982fe",
-        }),
+            "url": "40f5163488522ca5d918750ed7bd7fcf437982fe"}),
+        ("https://farm2.static.flickr.com/1035/1188352415_cb139831d0.jpg", {
+            "url": "ef217b4fdcb148a0cc9eae44b9342d4a65f6d697"}),
+        ("https://flic.kr/p/FPVo9U", {
+            "url": "92c54a00f31040c349cb2abcb1b9abe30cc508ae"}),
        ("https://www.flickr.com/photos/zzz/16089302238", {
-            "exception": exception.NotFoundError,
-        }),
+            "exception": exception.NotFoundError}),
    ]

+    def __init__(self, match):
+        FlickrExtractor.__init__(self, match)
+        if self.item_id == "p":
+            alphabet = ("123456789abcdefghijkmnopqrstu"
+                        "vwxyzABCDEFGHJKLMNPQRSTUVWXYZ")
+            self.item_id = util.bdecode(match.group(2), alphabet)
+        self.metadata = self.config("metadata", False)
+
    def items(self):
        size = self.api.photos_getSizes(self.item_id)[-1]

@@ -74,6 +70,20 @@ class FlickrImageExtractor(FlickrExtractor):
        yield Message.Directory, info
        yield Message.Url, url, info

+    @staticmethod
+    def _clean(photo):
+        del photo["comments"]
+        del photo["views"]
+
+        photo["title"] = photo["title"]["_content"]
+        photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
+
+        if "location" in photo:
+            location = photo["location"]
+            for key, value in location.items():
+                if isinstance(value, dict):
+                    location[key] = value["_content"]
+

 class FlickrAlbumExtractor(FlickrExtractor):
    """Extractor for photo albums from flickr.com"""
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -43,7 +43,7 @@ def optimize_range(ranges):

    Examples
        optimize_range([(2,4), (4,6), (5,8)])         -> [(2,8)]
-        optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8-9)]
+        optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
    """
    if len(ranges) <= 1:
        return ranges
@@ -63,6 +63,16 @@ def optimize_range(ranges):
    return result


+def bdecode(data, alphabet="0123456789"):
+    """Decode a base-N encoded string ( N = len(alphabet) )"""
+    num = 0
+    base = len(alphabet)
+    for c in data:
+        num *= base
+        num += alphabet.index(c)
+    return num
+
+
 def code_to_language(code, default="English"):
    """Map an ISO 639-1 language code to its actual name"""
    return codes.get(code.lower(), default)