diff --git a/docs/configuration.rst b/docs/configuration.rst
index 59fa8fc2..f2a3aa35 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -432,15 +432,17 @@ Description
"isAdult" : "1"
}
- * A ``list`` with up to 3 entries specifying a browser profile.
+ * A ``list`` with up to 4 entries specifying a browser profile.
* The first entry is the browser name
* The optional second entry is a profile name or an absolute path to a profile directory
* The optional third entry is the keyring to retrieve passwords for decrypting cookies from
+ * The optional fourth entry is a (Firefox) container name (``"none"`` for only cookies with no container)
.. code:: json
["firefox"]
+ ["firefox", null, null, "Personal"]
["chromium", "Private", "kwallet"]
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index fffe3ac6..3c7d6cf2 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -433,12 +433,6 @@ Consider all sites to be NSFW unless otherwise known.
Soundtracks |
|
-
- | Kiss Goddess |
- https://kissgoddess.com/ |
- Galleries, Models |
- |
-
| Kohlchan |
https://kohlchan.net/ |
@@ -447,7 +441,7 @@ Consider all sites to be NSFW unless otherwise known.
| Komikcast |
- https://komikcast.me/ |
+ https://komikcast.site/ |
Chapters, Manga |
|
@@ -1269,12 +1263,6 @@ Consider all sites to be NSFW unless otherwise known.
| FoOlSlide Instances |
-
- | Kirei Cake |
- https://reader.kireicake.com/ |
- Chapters, Manga |
- |
-
| PowerManga |
https://read.powermanga.org/ |
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 3701d6fd..611b2b92 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -66,7 +66,12 @@ def main():
if args.cookies_from_browser:
browser, _, profile = args.cookies_from_browser.partition(":")
browser, _, keyring = browser.partition("+")
- config.set((), "cookies", (browser, profile, keyring))
+ if profile.startswith(":"):
+ container = profile[1:]
+ profile = None
+ else:
+ profile, _, container = profile.partition("::")
+ config.set((), "cookies", (browser, profile, keyring, container))
for opts in args.options:
config.set(*opts)
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 6f9a92db..ee00bf74 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -24,7 +24,7 @@ import tempfile
from datetime import datetime, timedelta, timezone
from hashlib import pbkdf2_hmac
from http.cookiejar import Cookie
-from . import aes
+from . import aes, text
SUPPORTED_BROWSERS_CHROMIUM = {
@@ -35,11 +35,10 @@ logger = logging.getLogger("cookies")
def load_cookies(cookiejar, browser_specification):
- browser_name, profile, keyring = \
+ browser_name, profile, keyring, container = \
_parse_browser_specification(*browser_specification)
-
if browser_name == "firefox":
- load_cookies_firefox(cookiejar, profile)
+ load_cookies_firefox(cookiejar, profile, container)
elif browser_name == "safari":
load_cookies_safari(cookiejar, profile)
elif browser_name in SUPPORTED_BROWSERS_CHROMIUM:
@@ -48,12 +47,24 @@ def load_cookies(cookiejar, browser_specification):
raise ValueError("unknown browser '{}'".format(browser_name))
-def load_cookies_firefox(cookiejar, profile=None):
- set_cookie = cookiejar.set_cookie
- with _firefox_cookies_database(profile) as db:
+def load_cookies_firefox(cookiejar, profile=None, container=None):
+ path, container_id = _firefox_cookies_database(profile, container)
+ with DatabaseCopy(path) as db:
+
+ sql = ("SELECT name, value, host, path, isSecure, expiry "
+ "FROM moz_cookies")
+ parameters = ()
+
+ if container_id is False:
+ sql += " WHERE NOT INSTR(originAttributes,'userContextId=')"
+ elif container_id:
+ sql += " WHERE originAttributes LIKE ? OR originAttributes LIKE ?"
+ uid = "%userContextId={}".format(container_id)
+ parameters = (uid, uid + "&%")
+
+ set_cookie = cookiejar.set_cookie
for name, value, domain, path, secure, expires in db.execute(
- "SELECT name, value, host, path, isSecure, expiry "
- "FROM moz_cookies"):
+ sql, parameters):
set_cookie(Cookie(
0, name, value, None, False,
domain, bool(domain), domain.startswith("."),
@@ -79,9 +90,10 @@ def load_cookies_safari(cookiejar, profile=None):
def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
config = _get_chromium_based_browser_settings(browser_name)
+ path = _chrome_cookies_database(profile, config)
+ logger.debug("Extracting cookies from %s", path)
- with _chrome_cookies_database(profile, config) as db:
-
+ with DatabaseCopy(path) as db:
db.text_factory = bytes
decryptor = get_cookie_decryptor(
config["directory"], config["keyring"], keyring=keyring)
@@ -134,8 +146,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
# --------------------------------------------------------------------
# firefox
-def _firefox_cookies_database(profile=None):
- if profile is None:
+def _firefox_cookies_database(profile=None, container=None):
+ if not profile:
search_root = _firefox_browser_directory()
elif _is_path(profile):
search_root = profile
@@ -146,14 +158,45 @@ def _firefox_cookies_database(profile=None):
if path is None:
raise FileNotFoundError("Unable to find Firefox cookies database in "
"{}".format(search_root))
-
logger.debug("Extracting cookies from %s", path)
- return DatabaseCopy(path)
+
+ if container == "none":
+ container_id = False
+ logger.debug("Only loading cookies not belonging to any container")
+
+ elif container:
+ containers_path = os.path.join(
+ os.path.dirname(path), "containers.json")
+
+ try:
+ with open(containers_path) as containers:
+ identities = json.load(containers)["identities"]
+ except OSError:
+ logger.error("Unable to read Firefox container database at %s",
+ containers_path)
+ raise
+ except KeyError:
+ identities = ()
+
+ for context in identities:
+ if container == context.get("name") or container == text.extr(
+ context.get("l10nID", ""), "userContext", ".label"):
+ container_id = context["userContextId"]
+ break
+ else:
+ raise ValueError("Unable to find Firefox container {}".format(
+ container))
+ logger.debug("Only loading cookies from container '%s' (ID %s)",
+ container, container_id)
+ else:
+ container_id = None
+
+ return path, container_id
def _firefox_browser_directory():
if sys.platform in ("win32", "cygwin"):
- return os.path.expandvars(R"%APPDATA%\Mozilla\Firefox\Profiles")
+ return os.path.expandvars(r"%APPDATA%\Mozilla\Firefox\Profiles")
if sys.platform == "darwin":
return os.path.expanduser("~/Library/Application Support/Firefox")
return os.path.expanduser("~/.mozilla/firefox")
@@ -237,7 +280,7 @@ def _safari_parse_cookies_record(data, cookiejar):
cookiejar.set_cookie(Cookie(
0, name, value, None, False,
- domain, bool(domain), domain.startswith('.'),
+ domain, bool(domain), domain.startswith("."),
path, bool(path), is_secure, expiration_date, False,
None, None, {},
))
@@ -265,9 +308,7 @@ def _chrome_cookies_database(profile, config):
if path is None:
raise FileNotFoundError("Unable to find {} cookies database in "
"'{}'".format(config["browser"], search_root))
-
- logger.debug("Extracting cookies from %s", path)
- return DatabaseCopy(path)
+ return path
def _get_chromium_based_browser_settings(browser_name):
@@ -937,11 +978,12 @@ def _is_path(value):
return os.path.sep in value
-def _parse_browser_specification(browser, profile=None, keyring=None):
+def _parse_browser_specification(
+ browser, profile=None, keyring=None, container=None):
if browser not in SUPPORTED_BROWSERS:
raise ValueError("unsupported browser '{}'".format(browser))
if keyring and keyring not in SUPPORTED_KEYRINGS:
raise ValueError("unsupported keyring '{}'".format(keyring))
if profile and _is_path(profile):
profile = os.path.expanduser(profile)
- return browser, profile, keyring
+ return browser, profile, keyring, container
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index 28acc3d5..f86691d4 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -146,6 +146,7 @@ class _35photoTagExtractor(_35photoExtractor):
test = ("https://35photo.pro/tags/landscape/", {
"range": "1-25",
"count": 25,
+ "archive": False,
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index 1e020c25..0e128c3a 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -92,8 +92,8 @@ class _8chanThreadExtractor(_8chanExtractor):
"uniquePosters": 9,
"usesCustomCss": True,
"usesCustomJs": False,
- "wsPort": 8880,
- "wssPort": 2087,
+ "?wsPort": 8880,
+ "?wssPort": 2087,
},
}),
("https://8chan.se/vhs/res/4.html"),
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d2bbcbb7..3b553c84 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -74,7 +74,6 @@ modules = [
"keenspot",
"kemonoparty",
"khinsider",
- "kissgoddess",
"kohlchan",
"komikcast",
"lightroom",
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 75785709..be7e7197 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -915,20 +915,6 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
DeviantartStashExtractor.pattern),
"count": 2,
}),
- # video
- ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
- "pattern": r"https://wixmp-.+wixmp.com/v/mp4/.+\.720p\.\w+.mp4",
- "keyword": {
- "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
- "extension": "mp4",
- "target": {
- "duration": 306,
- "filesize": 19367585,
- "quality": "720p",
- "src": str,
- },
- }
- }),
# journal
("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
"url": "d34b2c9f873423e665a1b8ced20fcb75951694a3",
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index b4dadc7e..ad3f16ba 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -92,16 +92,29 @@ class EromeAlbumExtractor(EromeExtractor):
"""Extractor for albums on erome.com"""
subcategory = "album"
pattern = BASE_PATTERN + r"/a/(\w+)"
- test = ("https://www.erome.com/a/TyFMI7ik", {
- "pattern": r"https://s\d+\.erome\.com/\d+/TyFMI7ik/\w+",
- "count": 9,
- "keyword": {
- "album_id": "TyFMI7ik",
- "num": int,
- "title": "Ryan Ryans",
- "user": "xanub",
- },
- })
+ test = (
+ ("https://www.erome.com/a/NQgdlWvk", {
+ "pattern": r"https://v\d+\.erome\.com/\d+"
+ r"/NQgdlWvk/j7jlzmYB_480p\.mp4",
+ "count": 1,
+ "keyword": {
+ "album_id": "NQgdlWvk",
+ "num": 1,
+ "title": "porn",
+ "user": "yYgWBZw8o8qsMzM",
+ },
+ }),
+ ("https://www.erome.com/a/TdbZ4ogi", {
+ "pattern": r"https://s\d+\.erome\.com/\d+/TdbZ4ogi/\w+",
+ "count": 6,
+ "keyword": {
+ "album_id": "TdbZ4ogi",
+ "num": int,
+ "title": "82e78cfbb461ad87198f927fcb1fda9a1efac9ff.",
+ "user": "yYgWBZw8o8qsMzM",
+ },
+ }),
+ )
def albums(self):
return (self.item,)
@@ -110,7 +123,7 @@ class EromeAlbumExtractor(EromeExtractor):
class EromeUserExtractor(EromeExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
- test = ("https://www.erome.com/xanub", {
+ test = ("https://www.erome.com/yYgWBZw8o8qsMzM", {
"range": "1-25",
"count": 25,
})
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index a546f684..dccc74e4 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -117,9 +117,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
("https://exhentai.org/g/1200119/d55c44d3d0/", {
+ "options": (("original", False),),
"keyword": {
"cost": int,
- "date": "dt:2018-03-18 20:15:00",
+ "date": "dt:2018-03-18 20:14:00",
"eh_category": "Non-H",
"expunged": False,
"favorites": r"re:^[12]\d$",
@@ -150,7 +151,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"uploader": "klorpa",
"width": int,
},
- "content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff",
+ "content": ("2c68cff8a7ca540a78c36fdbf5fbae0260484f87",
+ "e9891a4c017ed0bb734cd1efba5cd03f594d31ff"),
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
"exception": exception.NotFoundError,
@@ -159,9 +161,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"exception": exception.AuthorizationError,
}),
("https://exhentai.org/s/f68367b4c8/1200119-3", {
+ "options": (("original", False),),
"count": 2,
}),
("https://e-hentai.org/s/f68367b4c8/1200119-3", {
+ "options": (("original", False),),
"count": 2,
}),
("https://g.e-hentai.org/g/1200119/d55c44d3d0/"),
@@ -516,7 +520,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
data["gallery_token"] = gallery.group(3)
yield Message.Queue, url + "/", data
- next_url = text.extr(page, 'nexturl = "', '"', None)
+ next_url = text.extr(page, 'nexturl="', '"', None)
if next_url is not None:
if not next_url:
return
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 81671ecd..2290cc25 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -39,10 +39,6 @@ class FoolslideExtractor(BaseExtractor):
BASE_PATTERN = FoolslideExtractor.update({
- "kireicake": {
- "root": "https://reader.kireicake.com",
- "pattern": r"reader\.kireicake\.com",
- },
"powermanga": {
"root": "https://read.powermanga.org",
"pattern": r"read(?:er)?\.powermanga\.org",
@@ -64,10 +60,6 @@ class FoolslideChapterExtractor(FoolslideExtractor):
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
test = (
- ("https://reader.kireicake.com/read/wonderland/en/1/1/", {
- "url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
- "keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
- }),
(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
@@ -123,10 +115,6 @@ class FoolslideMangaExtractor(FoolslideExtractor):
categorytransfer = True
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
test = (
- ("https://reader.kireicake.com/series/wonderland/", {
- "url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
- "keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb",
- }),
(("https://read.powermanga.org"
"/series/one_piece_digital_colour_comics/"), {
"count": ">= 1",
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index da87b8f1..facd3dbe 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -174,7 +174,8 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^]+)"
test = (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
- "content": "5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
+ "content": ("5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
+ "622e80be3f496672c44aab5c47fbc6941c61bc79"),
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 2,
}),
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 6fcfc555..207562a3 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -200,7 +200,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
return self.request(self.page_url).cookies
def get_info(self, page):
- url , pos = text.extract(page, 'center;">
", "")),
+ "count": text.parse_int(extr(
+ "total of images in this gallery: ", " ")),
+ "date" : text.parse_datetime(
+ extr("created on ", " by <")
+ .replace("th, ", " ", 1).replace("nd, ", " ", 1)
+ .replace("st, ", " ", 1), "%B %d %Y at %H:%M"),
+ "user" : text.unescape(extr(">", "<")),
+ }
def images(self, page):
- """Yield all image urls for this gallery"""
pnum = 0
+
while True:
thumbs = text.extr(page, '')
for url in text.extract_iter(thumbs, '
' not in page:
return
- pnum += 1
- page = self.request(self.url_base + str(pnum)).text
- def metadata(self, page):
- """Collect metadata for extractor-job"""
- return text.extract_all(page, (
- ("title", '', '
'),
- ("count", 'total of images in this gallery: ', ' '),
- ("date" , 'created on ', ' by <'),
- (None , 'href="/users/', ''),
- ("user" , '>', '<'),
- ), values={"gallery_id": self.gid})[0]
+ pnum += 1
+ url = "{}/gallery/{}/g/page/{}".format(
+ self.root, self.gallery_id, pnum)
+ page = self.request(url).text
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index d5cca1c2..0c3b002f 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -65,7 +65,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
"count": text.parse_int(extr("Number of Files: ", "<")),
"size" : text.parse_bytes(extr("Total Filesize: ", "<")[:-1]),
"date" : extr("Date Added: ", "<"),
- "type" : extr("Album type: ", "<"),
+ "type" : text.remove_html(extr("Album type: ", "")),
}}
def tracks(self, page):
diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py
deleted file mode 100644
index 4ec685c2..00000000
--- a/gallery_dl/extractor/kissgoddess.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2022 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://kissgoddess.com/"""
-
-from .common import GalleryExtractor, Extractor, Message
-from .. import text, exception
-
-
-class KissgoddessGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries on kissgoddess.com"""
- category = "kissgoddess"
- root = "https://kissgoddess.com"
- pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/album/(\d+)"
- test = ("https://kissgoddess.com/album/18285.html", {
- "pattern": r"https://pic\.kissgoddess\.com"
- r"/gallery/16473/18285/s/\d+\.jpg",
- "count": 19,
- "keyword": {
- "gallery_id": 18285,
- "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
- },
- })
-
- def __init__(self, match):
- self.gallery_id = match.group(1)
- url = "{}/album/{}.html".format(self.root, self.gallery_id)
- GalleryExtractor.__init__(self, match, url)
-
- def metadata(self, page):
- return {
- "gallery_id": text.parse_int(self.gallery_id),
- "title" : text.extr(
- page, '', "<")[0].rpartition(" | "),
- }
-
- def images(self, page):
- pnum = 1
-
- while page:
- for url in text.extract_iter(page, "
= 7",
- })
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.model = match.group(1)
-
- def items(self):
- url = "{}/people/{}.html".format(self.root, self.model)
- page = self.request(url).text
-
- data = {"_extractor": KissgoddessGalleryExtractor}
- for path in text.extract_iter(page, 'thumb">", " – Komikcast<")
+ info = text.extr(page, "", " - Komikcast<")
return self.parse_chapter_string(info)
@staticmethod
@@ -76,12 +76,12 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
- """Extractor for manga from komikcast.me"""
+ """Extractor for manga from komikcast.site"""
chapterclass = KomikcastChapterExtractor
pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
test = (
- ("https://komikcast.me/komik/090-eko-to-issho/", {
- "url": "08204f0a703ec5272121abcf0632ecacba1e588f",
+ ("https://komikcast.site/komik/090-eko-to-issho/", {
+ "url": "19d3d50d532e84be6280a3d61ff0fd0ca04dd6b4",
"keyword": "837a7e96867344ff59d840771c04c20dc46c0ab1",
}),
("https://komikcast.me/tonari-no-kashiwagi-san/"),
@@ -101,7 +101,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
@staticmethod
def metadata(page):
"""Return a dict with general metadata"""
- manga , pos = text.extract(page, "" , " – Komikcast<")
+ manga , pos = text.extract(page, "" , " - Komikcast<")
genres, pos = text.extract(
page, 'class="komik_info-content-genre">', "", pos)
author, pos = text.extract(page, ">Author:", "", pos)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 0bc35274..dae203e7 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -109,7 +109,7 @@ class MangadexChapterExtractor(MangadexExtractor):
}),
# 'externalUrl', but still downloadable (#2503)
("https://mangadex.org/chapter/364728a4-6909-4164-9eea-6b56354f7c78", {
- "count": 39,
+ "count": 0, # 404
}),
)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 53e5e790..ad4282c8 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -72,7 +72,7 @@ class RedgifsUserExtractor(RedgifsExtractor):
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)"
test = ("https://www.redgifs.com/users/Natalifiction", {
"pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4",
- "count": ">= 120",
+ "count": ">= 100",
})
def metadata(self):
@@ -89,7 +89,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/browse/?\?([^#]+)"
test = (
("https://www.redgifs.com/browse?tags=JAV", {
- "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.mp4",
+ "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
"range": "1-10",
"count": 10,
}),
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index ae4e2e8a..3727c0b0 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -43,7 +43,8 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
}),
("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
"range": "34",
- "content": ("52b5a310587de1048030ab13a912f6a3a9cc7dab",
+ "content": ("276eb2c902187bb177ae8013e310e1d6641fba9a",
+ "52b5a310587de1048030ab13a912f6a3a9cc7dab",
"cec6630e659dc72db1ee1a9a6f3b525189261988",
"6f81e1e74c6cd6db36844e7211eef8e7cd30055d",
"22e83645fc242bc3584eca7ec982c8a53a4d8a44"),
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 2264fe48..713d4c41 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -117,7 +117,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
# video
("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
"url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "4cef98133ace511adc874c9d9abac5817ba0d856",
+ "keyword": "2b545184592c282b365fcbb7df6ca7952b8a3173",
}),
)
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index f010f926..30bf2f15 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -83,7 +83,7 @@ class TwibooruPostExtractor(TwibooruExtractor):
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2022-09-21T14:31:50.441Z",
+ "updated_at": "2022-11-27T00:34:50.483Z",
"upvotes": int,
"view_url": "https://cdn.twibooru.org/img/2020/7/8/1/full.png",
"width": 576,
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 22d4a6ec..d0411acf 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -633,7 +633,7 @@ class TwitterEventExtractor(TwitterExtractor):
pattern = BASE_PATTERN + r"/i/events/(\d+)"
test = ("https://twitter.com/i/events/1484669206993903616", {
"range": "1-20",
- "count": ">5",
+ "count": ">=1",
})
def metadata(self):
@@ -759,7 +759,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# retweet with missing media entities (#1555)
("https://twitter.com/morino_ya/status/1392763691599237121", {
"options": (("retweets", True),),
- "count": 4,
+ "count": 0, # private
}),
# deleted quote tweet (#2225)
("https://twitter.com/i/web/status/1460044411165888515", {
@@ -782,7 +782,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# '?format=...&name=...'-style URLs
("https://twitter.com/poco_dandy/status/1150646424461176832", {
"options": (("cards", True),),
- "pattern": r"https://pbs.twimg.com/card_img/157\d+/\w+"
+ "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+"
r"\?format=(jpg|png)&name=orig$",
"range": "1-2",
}),
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 8bea18c7..b298c27e 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -78,11 +78,11 @@ class UnsplashImageExtractor(UnsplashExtractor):
pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
test = ("https://unsplash.com/photos/lsoogGC_5dg", {
"pattern": r"https://images\.unsplash\.com/photo-1586348943529-"
- r"beaae6c28db9\?ixid=\w+&ixlib=rb-1.2.1",
+ r"beaae6c28db9\?ixid=\w+&ixlib=rb-4.0.3",
"keyword": {
"alt_description": "re:silhouette of trees near body of water ",
"blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
- "categories": list,
+ "? categories": list,
"color": "#f3c08c",
"created_at": "2020-04-08T12:29:42Z",
"date": "dt:2020-04-08 12:29:42",
@@ -108,9 +108,8 @@ class UnsplashImageExtractor(UnsplashExtractor):
"name": "Beaver Dam, WI 53916, USA",
"position": {
"latitude": 43.457769,
- "longitude": -88.837329
+ "longitude": -88.837329,
},
- "title": "Beaver Dam, WI 53916, USA"
},
"promoted_at": "2020-04-08T15:12:03Z",
"sponsorship": None,
@@ -149,7 +148,7 @@ class UnsplashUserExtractor(UnsplashExtractor):
pattern = BASE_PATTERN + r"/@(\w+)/?$"
test = ("https://unsplash.com/@davehoefler", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"range": "1-30",
"count": 30,
})
@@ -166,7 +165,7 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
pattern = BASE_PATTERN + r"/@(\w+)/likes"
test = ("https://unsplash.com/@davehoefler/likes", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"range": "1-30",
"count": 30,
})
@@ -184,7 +183,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor):
test = (
("https://unsplash.com/collections/3178572/winter", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"keyword": {"collection_id": "3178572",
"collection_title": "winter"},
"range": "1-30",
@@ -212,8 +211,9 @@ class UnsplashSearchExtractor(UnsplashExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
test = ("https://unsplash.com/s/photos/hair-style", {
- "pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "pattern": r"https://(images|plus)\.unsplash\.com"
+ r"/((flagged/|premium_)?photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"range": "1-30",
"count": 30,
})
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 677680fa..bdedfcbc 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -1,21 +1,22 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://warosu.org/"""
+"""Extractors for https://warosu.org/"""
from .common import Extractor, Message
from .. import text
class WarosuThreadExtractor(Extractor):
- """Extractor for images from threads on warosu.org"""
+ """Extractor for threads on warosu.org"""
category = "warosu"
subcategory = "thread"
+ root = "https://warosu.org"
directory_fmt = ("{category}", "{board}", "{thread} - {title}")
filename_fmt = "{tim}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
@@ -31,7 +32,6 @@ class WarosuThreadExtractor(Extractor):
"content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c",
}),
)
- root = "https://warosu.org"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -40,12 +40,12 @@ class WarosuThreadExtractor(Extractor):
def items(self):
url = "{}/{}/thread/{}".format(self.root, self.board, self.thread)
page = self.request(url).text
- data = self.get_metadata(page)
+ data = self.metadata(page)
posts = self.posts(page)
if not data["title"]:
- title = text.remove_html(posts[0]["com"])
- data["title"] = text.unescape(title)[:50]
+ data["title"] = text.unescape(text.remove_html(
+ posts[0]["com"]))[:50]
yield Message.Directory, data
for post in posts:
@@ -55,25 +55,24 @@ class WarosuThreadExtractor(Extractor):
post.update(data)
yield Message.Url, post["image"], post
- def get_metadata(self, page):
- """Collect metadata for extractor-job"""
+ def metadata(self, page):
boardname = text.extr(page, "", "")
title = text.extr(page, 'filetitle" itemprop="name">', '<')
return {
- "board": self.board,
+ "board" : self.board,
"board_name": boardname.rpartition(" - ")[2],
- "thread": self.thread,
- "title": title,
+ "thread" : self.thread,
+ "title" : title,
}
def posts(self, page):
- """Build a list of all post-objects"""
+ """Build a list of all post objects"""
page = text.extr(page, '', '
')
needle = ''
return [self.parse(post) for post in page.split(needle)]
def parse(self, post):
- """Build post-object by extracting data from an HTML post"""
+ """Build post object by extracting data from an HTML post"""
data = self._extract_post(post)
if "File:" in post:
self._extract_image(post, data)
@@ -84,24 +83,23 @@ class WarosuThreadExtractor(Extractor):
@staticmethod
def _extract_post(post):
- data = text.extract_all(post, (
- ("no" , 'id="p', '"'),
- ("name", '', ''),
- ("time", ''),
- ("now" , '', '<'),
- ("com" , '', '
'),
- ))[0]
- data["com"] = text.unescape(text.remove_html(data["com"].strip()))
- return data
+ extr = text.extract_from(post)
+ return {
+ "no" : extr('id="p', '"'),
+ "name": extr('', ""),
+ "time": extr(''),
+ "now" : extr("", "<"),
+ "com" : text.unescape(text.remove_html(extr(
+ '', '
'
+ ).strip())),
+ }
@staticmethod
def _extract_image(post, data):
- text.extract_all(post, (
- ("fsize" , 'File: ', ', '),
- ("w" , '', 'x'),
- ("h" , '', ', '),
- ("filename", '', '<'),
- ("image" , '
\nFile: ", ", ")
+ data["w"] = extr("", "x")
+ data["h"] = extr("", ", ")
+ data["filename"] = text.unquote(extr("", "<").rpartition(".")[0])
+ extr("
", "")
+ data["image"] = "https:" + extr(' "f_o_o_b_a_r" (if "f" is "f o o b a r")
"""
- def __init__(self, format_string, default=None, fmt=format):
+ def __init__(self, format_string, default=NONE, fmt=format):
self.default = default
self.format = fmt
self.result = []
@@ -193,7 +195,7 @@ class StringFormatter():
class TemplateFormatter(StringFormatter):
"""Read format_string from file"""
- def __init__(self, path, default=None, fmt=format):
+ def __init__(self, path, default=NONE, fmt=format):
with open(util.expand_path(path)) as fp:
format_string = fp.read()
StringFormatter.__init__(self, format_string, default, fmt)
@@ -202,14 +204,14 @@ class TemplateFormatter(StringFormatter):
class ExpressionFormatter():
"""Generate text by evaluating a Python expression"""
- def __init__(self, expression, default=None, fmt=None):
+ def __init__(self, expression, default=NONE, fmt=None):
self.format_map = util.compile_expression(expression)
class ModuleFormatter():
"""Generate text by calling an external function"""
- def __init__(self, function_spec, default=None, fmt=None):
+ def __init__(self, function_spec, default=NONE, fmt=None):
module_name, _, function_name = function_spec.partition(":")
module = __import__(module_name)
self.format_map = getattr(module, function_name)
@@ -218,7 +220,7 @@ class ModuleFormatter():
class FStringFormatter():
"""Generate text by evaluaring an f-string literal"""
- def __init__(self, fstring, default=None, fmt=None):
+ def __init__(self, fstring, default=NONE, fmt=None):
self.format_map = util.compile_expression("f'''" + fstring + "'''")
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 4d9a3587..91e9169c 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -142,10 +142,12 @@ def build_parser():
)
general.add_argument(
"--cookies-from-browser",
- dest="cookies_from_browser", metavar="BROWSER[+KEYRING][:PROFILE]",
+ dest="cookies_from_browser",
+ metavar="BROWSER[+KEYRING][:PROFILE][::CONTAINER]",
help=("Name of the browser to load cookies from, "
- "with optional keyring name prefixed with '+' and "
- "profile prefixed with ':'"),
+ "with optional keyring name prefixed with '+', "
+ "profile prefixed with ':', and "
+ "container prefixed with '::' ('none' for no container)"),
)
output = parser.add_argument_group("Output Options")
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 7d599ee2..3b360e99 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -29,6 +29,8 @@ class PathFormat():
def __init__(self, extractor):
config = extractor.config
kwdefault = config("keywords-default")
+ if kwdefault is None:
+ kwdefault = util.NONE
filename_fmt = config("filename")
try:
@@ -212,14 +214,19 @@ class PathFormat():
def fix_extension(self, _=None):
"""Fix filenames without a given filename extension"""
- if not self.extension:
- self.kwdict["extension"] = self.prefix + self.extension_map("", "")
- self.build_path()
- if self.path[-1] == ".":
- self.path = self.path[:-1]
- self.temppath = self.realpath = self.realpath[:-1]
- elif not self.temppath:
+ try:
+ if not self.extension:
+ self.kwdict["extension"] = \
+ self.prefix + self.extension_map("", "")
+ self.build_path()
+ if self.path[-1] == ".":
+ self.path = self.path[:-1]
+ self.temppath = self.realpath = self.realpath[:-1]
+ elif not self.temppath:
+ self.build_path()
+ except Exception:
self.path = self.directory + "?"
+ self.realpath = self.temppath = self.realdirectory + "?"
return True
def build_filename(self, kwdict):
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 8ce1fb40..23d5bc8e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -528,8 +528,8 @@ def parse_inputfile(file, log):
yield line
-class UniversalNone():
- """None-style object that supports more operations than None itself"""
+class CustomNone():
+ """None-style type that supports more operations than regular None"""
__slots__ = ()
def __getattribute__(self, _):
@@ -538,10 +538,28 @@ class UniversalNone():
def __getitem__(self, _):
return self
+ def __iter__(self):
+ return self
+
+ def __call__(self, *args, **kwargs):
+ return self
+
+ @staticmethod
+ def __next__():
+ raise StopIteration
+
@staticmethod
def __bool__():
return False
+ @staticmethod
+ def __len__():
+ return 0
+
+ @staticmethod
+ def __format__(_):
+ return "None"
+
@staticmethod
def __str__():
return "None"
@@ -549,7 +567,7 @@ class UniversalNone():
__repr__ = __str__
-NONE = UniversalNone()
+NONE = CustomNone()
EPOCH = datetime.datetime(1970, 1, 1)
SECOND = datetime.timedelta(0, 1)
WINDOWS = (os.name == "nt")
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d2890098..6975192a 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.24.1"
+__version__ = "1.24.2-dev"
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index d8106095..8cff63c3 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -60,8 +60,6 @@ CATEGORY_MAP = {
"joyreactor" : "JoyReactor",
"kabeuchi" : "かべうち",
"kemonoparty" : "Kemono",
- "kireicake" : "Kirei Cake",
- "kissgoddess" : "Kiss Goddess",
"lineblog" : "LINE BLOG",
"livedoor" : "livedoor Blog",
"omgmiamiswimwear": "Omg Miami Swimwear",
diff --git a/test/test_util.py b/test/test_util.py
index 2921ea23..4b8f9ae4 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -618,10 +618,21 @@ class TestOther(unittest.TestCase):
obj = util.NONE
self.assertFalse(obj)
+ self.assertEqual(len(obj), 0)
self.assertEqual(str(obj), str(None))
self.assertEqual(repr(obj), repr(None))
+ self.assertEqual(format(obj), str(None))
+ self.assertEqual(format(obj, "%F"), str(None))
self.assertIs(obj.attr, obj)
self.assertIs(obj["key"], obj)
+ self.assertIs(obj(), obj)
+ self.assertIs(obj(1, "a"), obj)
+ self.assertIs(obj(foo="bar"), obj)
+
+ i = 0
+ for _ in obj:
+ i += 1
+ self.assertEqual(i, 0)
class TestExtractor():