From dac796879a546fa6bc8aa08b7729506ddde84086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 9 Aug 2016 14:05:12 +0200 Subject: [PATCH] [imagefap] add extractor --- README.rst | 3 +- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/imagefap.py | 66 ++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/imagefap.py diff --git a/README.rst b/README.rst index 4c90a8ee..f7c4da47 100644 --- a/README.rst +++ b/README.rst @@ -48,7 +48,8 @@ Supported Sites * Japanese: nijie.info, pixiv.net * Western: - deviantart.com, hentai-foundry.com, imgth.com, imgur.com, tumblr.com + deviantart.com, hentai-foundry.com, imagefap,com, imgth.com, imgur.com, + tumblr.com * Futaba Channel-like: 4chan.org, 8ch.net * Image Hosts: diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 2d3e08b4..ca53c0b6 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -29,6 +29,7 @@ modules = [ "hentaifoundry", "hitomi", "imagebam", + "imagefap", "imagetwist", "imgbox", "imgchili", diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py new file mode 100644 index 00000000..e4bfff11 --- /dev/null +++ b/gallery_dl/extractor/imagefap.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +# Copyright 2016 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from http://imagefap.com/""" + +from .common import Extractor, Message +from .. import text + +class ImagefapGalleryExtractor(Extractor): + + category = "imagefap" + subcategory = "gallery" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{name}.{extension}" + pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/" + r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")] + test = [("http://www.imagefap.com/gallery/6316217", { + "url": "d814dc37efcfd3723c30cfe86fd9e51415e7eecf", + "keyword": "b55344341b1b79f8eb803098d24551bb79cada0f", + "content": "ead241b083da2e1d01c4c28a5faa1aa32c01700f", + })] + + def __init__(self, match): + Extractor.__init__(self) + self.gid = match.group(1) + + def items(self): + imgurl_fmt = ("http://x.imagefapusercontent.com/u/{uploader}/" + "{gallery-id}/{image-id}/{filename}") + url = "http://www.imagefap.com/pictures/" + self.gid + "/?view=2" + page = self.request(url).text + data = self.get_job_metadata(page) + yield Message.Version, 1 + yield Message.Directory, data + for image in self.get_images(page): + data.update(image) + yield Message.Url, imgurl_fmt.format(**data), data + + def get_job_metadata(self, page): + """Collect metadata for extractor-job""" + data = text.extract_all(page, ( + ("section" , 'Porn pics of ', ' (Page 1)'), + ("uploader", '>Uploaded by ', ''), + ("count" , ' 1 of ', ' pics"'), + ), values={"category": self.category, "gallery-id": self.gid})[0] + data["title"] = text.unescape(data["title"]) + return data + + @staticmethod + def get_images(page): + """Collect image-metadata""" + pos = 0 + num = 0 + while True: + imgid, pos = text.extract(page, '', '', pos) + num += 1 + yield text.nameext_from_url(name, {"image-id": imgid, "num": num})