From 86f0597c951d999b1989504ddcf99f2c39de75d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Dec 2022 19:28:50 +0100
Subject: [PATCH 01/13] [kissgoddess] remove module

site does not host albums anymore
---
 docs/supportedsites.md              |  6 ---
 gallery_dl/extractor/__init__.py    |  1 -
 gallery_dl/extractor/kissgoddess.py | 82 -----------------------------
 gallery_dl/version.py               |  2 +-
 scripts/supportedsites.py           |  1 -
 5 files changed, 1 insertion(+), 91 deletions(-)
 delete mode 100644 gallery_dl/extractor/kissgoddess.py
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index fffe3ac6..439fcd30 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -433,12 +433,6 @@ Consider all sites to be NSFW unless otherwise known.
     <td>Soundtracks</td>
     <td></td>
 </tr>
-<tr>
-    <td>Kiss Goddess</td>
-    <td>https://kissgoddess.com/</td>
-    <td>Galleries, Models</td>
-    <td></td>
-</tr>
 <tr>
     <td>Kohlchan</td>
     <td>https://kohlchan.net/</td>
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d2bbcbb7..3b553c84 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -74,7 +74,6 @@ modules = [
     "keenspot",
     "kemonoparty",
     "khinsider",
-    "kissgoddess",
     "kohlchan",
     "komikcast",
     "lightroom",
diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py
deleted file mode 100644
index 4ec685c2..00000000
--- a/gallery_dl/extractor/kissgoddess.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2022 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://kissgoddess.com/"""
-
-from .common import GalleryExtractor, Extractor, Message
-from .. import text, exception
-
-
-class KissgoddessGalleryExtractor(GalleryExtractor):
-    """Extractor for image galleries on kissgoddess.com"""
-    category = "kissgoddess"
-    root = "https://kissgoddess.com"
-    pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/album/(\d+)"
-    test = ("https://kissgoddess.com/album/18285.html", {
-        "pattern": r"https://pic\.kissgoddess\.com"
-                   r"/gallery/16473/18285/s/\d+\.jpg",
-        "count": 19,
-        "keyword": {
-            "gallery_id": 18285,
-            "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
-        },
-    })
-
-    def __init__(self, match):
-        self.gallery_id = match.group(1)
-        url = "{}/album/{}.html".format(self.root, self.gallery_id)
-        GalleryExtractor.__init__(self, match, url)
-
-    def metadata(self, page):
-        return {
-            "gallery_id": text.parse_int(self.gallery_id),
-            "title"     : text.extr(
-                page, '<title>', "<")[0].rpartition(" | "),
-        }
-
-    def images(self, page):
-        pnum = 1
-
-        while page:
-            for url in text.extract_iter(page, "<img src='", "'"):
-                yield url, None
-            for url in text.extract_iter(page, "<img data-original='", "'"):
-                yield url, None
-
-            pnum += 1
-            url = "{}/album/{}_{}.html".format(
-                self.root, self.gallery_id, pnum)
-            try:
-                page = self.request(url).text
-            except exception.HttpError:
-                return
-
-
-class KissgoddessModelExtractor(Extractor):
-    """Extractor for all galleries of a model on kissgoddess.com"""
-    category = "kissgoddess"
-    subcategory = "model"
-    root = "https://kissgoddess.com"
-    pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/people/([^./?#]+)"
-    test = ("https://kissgoddess.com/people/aya-hazuki.html", {
-        "pattern": KissgoddessGalleryExtractor.pattern,
-        "count": ">= 7",
-    })
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.model = match.group(1)
-
-    def items(self):
-        url = "{}/people/{}.html".format(self.root, self.model)
-        page = self.request(url).text
-
-        data = {"_extractor": KissgoddessGalleryExtractor}
-        for path in text.extract_iter(page, 'thumb"><a href="/album/', '"'):
-            url = self.root + "/album/" + path
-            yield Message.Queue, url, data
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d2890098..6975192a 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.24.1"
+__version__ = "1.24.2-dev"
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index d8106095..edd65546 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -61,7 +61,6 @@ CATEGORY_MAP = {
     "kabeuchi"       : "かべうち",
     "kemonoparty"    : "Kemono",
     "kireicake"      : "Kirei Cake",
-    "kissgoddess"    : "Kiss Goddess",
     "lineblog"       : "LINE BLOG",
     "livedoor"       : "livedoor Blog",
     "omgmiamiswimwear": "Omg Miami Swimwear",

From a42ba25ca19978c69a0ff5b265ea23988e7bf792 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Dec 2022 19:38:31 +0100
Subject: [PATCH 02/13] [foolslide] remove 'kireicake'

site redirects to (unclaimed) mangadex group
---
 docs/supportedsites.md            |  6 ------
 gallery_dl/extractor/foolslide.py | 12 ------------
 scripts/supportedsites.py         |  1 -
 3 files changed, 19 deletions(-)

diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 439fcd30..a0aded23 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1263,12 +1263,6 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
     <td colspan="4"><strong>FoOlSlide Instances</strong></td>
 </tr>
-<tr>
-    <td>Kirei Cake</td>
-    <td>https://reader.kireicake.com/</td>
-    <td>Chapters, Manga</td>
-    <td></td>
-</tr>
 <tr>
     <td>PowerManga</td>
     <td>https://read.powermanga.org/</td>
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 81671ecd..2290cc25 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -39,10 +39,6 @@ class FoolslideExtractor(BaseExtractor):
 
 
 BASE_PATTERN = FoolslideExtractor.update({
-    "kireicake": {
-        "root": "https://reader.kireicake.com",
-        "pattern": r"reader\.kireicake\.com",
-    },
     "powermanga": {
         "root": "https://read.powermanga.org",
         "pattern": r"read(?:er)?\.powermanga\.org",
@@ -64,10 +60,6 @@ class FoolslideChapterExtractor(FoolslideExtractor):
     archive_fmt = "{id}"
     pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
     test = (
-        ("https://reader.kireicake.com/read/wonderland/en/1/1/", {
-            "url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
-            "keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
-        }),
         (("https://read.powermanga.org"
           "/read/one_piece_digital_colour_comics/en/0/75/"), {
             "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
@@ -123,10 +115,6 @@ class FoolslideMangaExtractor(FoolslideExtractor):
     categorytransfer = True
     pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
     test = (
-        ("https://reader.kireicake.com/series/wonderland/", {
-            "url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
-            "keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb",
-        }),
         (("https://read.powermanga.org"
           "/series/one_piece_digital_colour_comics/"), {
             "count": ">= 1",
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index edd65546..8cff63c3 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -60,7 +60,6 @@ CATEGORY_MAP = {
     "joyreactor"     : "JoyReactor",
     "kabeuchi"       : "かべうち",
     "kemonoparty"    : "Kemono",
-    "kireicake"      : "Kirei Cake",
     "lineblog"       : "LINE BLOG",
     "livedoor"       : "livedoor Blog",
     "omgmiamiswimwear": "Omg Miami Swimwear",

From 5f57a27ba6dc99b418d52b97cadbf64e66605584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Dec 2022 22:15:59 +0100
Subject: [PATCH 03/13] [imagetwist] fix extraction

---
 gallery_dl/extractor/imagehosts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 6fcfc555..207562a3 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -200,7 +200,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
         return self.request(self.page_url).cookies
 
     def get_info(self, page):
-        url     , pos = text.extract(page, 'center;"><img src="', '"')
+        url     , pos = text.extract(page, '<img src="', '"')
         filename, pos = text.extract(page, ' alt="', '"', pos)
         return url, filename
 

From 4a3a1f4c87febe5a92ece61e6e7982c59b98313d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 5 Dec 2022 22:36:49 +0100
Subject: [PATCH 04/13] [komikcast] update domain and fix extraction

---
 docs/supportedsites.md            |  2 +-
 gallery_dl/extractor/komikcast.py | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index a0aded23..3c7d6cf2 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -441,7 +441,7 @@ Consider all sites to be NSFW unless otherwise known.
 </tr>
 <tr>
     <td>Komikcast</td>
-    <td>https://komikcast.me/</td>
+    <td>https://komikcast.site/</td>
     <td>Chapters, Manga</td>
     <td></td>
 </tr>
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index a9eebf40..04373c4b 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -6,19 +6,19 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://komikcast.me/"""
+"""Extractors for https://komikcast.site/"""
 
 from .common import ChapterExtractor, MangaExtractor
 from .. import text
 import re
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:me|com)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:site|me|com)"
 
 
 class KomikcastBase():
     """Base class for komikcast extractors"""
     category = "komikcast"
-    root = "https://komikcast.me"
+    root = "https://komikcast.site"
 
     @staticmethod
     def parse_chapter_string(chapter_string, data=None):
@@ -46,23 +46,23 @@ class KomikcastBase():
 
 
 class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
-    """Extractor for manga-chapters from komikcast.me"""
+    """Extractor for manga-chapters from komikcast.site"""
     pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
     test = (
-        (("https://komikcast.me/chapter"
+        (("https://komikcast.site/chapter"
           "/apotheosis-chapter-02-2-bahasa-indonesia/"), {
-            "url": "74eca5c9b27b896816497f9b2d847f2a1fcfc209",
+            "url": "f6b43fbc027697749b3ea1c14931c83f878d7936",
             "keyword": "f3938e1aff9ad1f302f52447e9781b21f6da26d4",
         }),
         (("https://komikcast.me/chapter"
           "/soul-land-ii-chapter-300-1-bahasa-indonesia/"), {
-            "url": "243a5250e210b40d17217e83b7547cefea5638bd",
+            "url": "efd00a9bd95461272d51990d7bc54b79ff3ff2e6",
             "keyword": "cb646cfed3d45105bd645ab38b2e9f7d8c436436",
         }),
     )
 
     def metadata(self, page):
-        info = text.extr(page, "<title>", " – Komikcast<")
+        info = text.extr(page, "<title>", " - Komikcast<")
         return self.parse_chapter_string(info)
 
     @staticmethod
@@ -76,12 +76,12 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
 
 
 class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
-    """Extractor for manga from komikcast.me"""
+    """Extractor for manga from komikcast.site"""
     chapterclass = KomikcastChapterExtractor
     pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
     test = (
-        ("https://komikcast.me/komik/090-eko-to-issho/", {
-            "url": "08204f0a703ec5272121abcf0632ecacba1e588f",
+        ("https://komikcast.site/komik/090-eko-to-issho/", {
+            "url": "19d3d50d532e84be6280a3d61ff0fd0ca04dd6b4",
             "keyword": "837a7e96867344ff59d840771c04c20dc46c0ab1",
         }),
         ("https://komikcast.me/tonari-no-kashiwagi-san/"),
@@ -101,7 +101,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
     @staticmethod
     def metadata(page):
         """Return a dict with general metadata"""
-        manga , pos = text.extract(page, "<title>" , " – Komikcast<")
+        manga , pos = text.extract(page, "<title>" , " - Komikcast<")
         genres, pos = text.extract(
             page, 'class="komik_info-content-genre">', "</span>", pos)
         author, pos = text.extract(page, ">Author:", "</span>", pos)

From 6afb3cc766f7da7624c5aa0c2040b133b287f0bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Tue, 6 Dec 2022 16:25:59 +0100
Subject: [PATCH 05/13] restore paths for archived files (#3362)

---
 gallery_dl/path.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 7d599ee2..77a33277 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -212,14 +212,19 @@ class PathFormat():
 
     def fix_extension(self, _=None):
         """Fix filenames without a given filename extension"""
-        if not self.extension:
-            self.kwdict["extension"] = self.prefix + self.extension_map("", "")
-            self.build_path()
-            if self.path[-1] == ".":
-                self.path = self.path[:-1]
-                self.temppath = self.realpath = self.realpath[:-1]
-        elif not self.temppath:
+        try:
+            if not self.extension:
+                self.kwdict["extension"] = \
+                    self.prefix + self.extension_map("", "")
+                self.build_path()
+                if self.path[-1] == ".":
+                    self.path = self.path[:-1]
+                    self.temppath = self.realpath = self.realpath[:-1]
+            elif not self.temppath:
+                self.build_path()
+        except Exception:
             self.path = self.directory + "?"
+            self.realpath = self.temppath = self.realdirectory + "?"
         return True
 
     def build_filename(self, kwdict):

From 43c211f1a7cb2870e9f2a4a5a6584b499b8c66b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Tue, 6 Dec 2022 18:44:46 +0100
Subject: [PATCH 06/13] extend and rename util.CustomNone

---
 gallery_dl/util.py | 24 +++++++++++++++++++++---
 test/test_util.py  | 11 +++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 8ce1fb40..23d5bc8e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -528,8 +528,8 @@ def parse_inputfile(file, log):
                 yield line
 
 
-class UniversalNone():
-    """None-style object that supports more operations than None itself"""
+class CustomNone():
+    """None-style type that supports more operations than regular None"""
     __slots__ = ()
 
     def __getattribute__(self, _):
@@ -538,10 +538,28 @@ class UniversalNone():
     def __getitem__(self, _):
         return self
 
+    def __iter__(self):
+        return self
+
+    def __call__(self, *args, **kwargs):
+        return self
+
+    @staticmethod
+    def __next__():
+        raise StopIteration
+
     @staticmethod
     def __bool__():
         return False
 
+    @staticmethod
+    def __len__():
+        return 0
+
+    @staticmethod
+    def __format__(_):
+        return "None"
+
     @staticmethod
     def __str__():
         return "None"
@@ -549,7 +567,7 @@ class UniversalNone():
     __repr__ = __str__
 
 
-NONE = UniversalNone()
+NONE = CustomNone()
 EPOCH = datetime.datetime(1970, 1, 1)
 SECOND = datetime.timedelta(0, 1)
 WINDOWS = (os.name == "nt")
diff --git a/test/test_util.py b/test/test_util.py
index 2921ea23..4b8f9ae4 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -618,10 +618,21 @@ class TestOther(unittest.TestCase):
         obj = util.NONE
 
         self.assertFalse(obj)
+        self.assertEqual(len(obj), 0)
         self.assertEqual(str(obj), str(None))
         self.assertEqual(repr(obj), repr(None))
+        self.assertEqual(format(obj), str(None))
+        self.assertEqual(format(obj, "%F"), str(None))
         self.assertIs(obj.attr, obj)
         self.assertIs(obj["key"], obj)
+        self.assertIs(obj(), obj)
+        self.assertIs(obj(1, "a"), obj)
+        self.assertIs(obj(foo="bar"), obj)
+
+        i = 0
+        for _ in obj:
+            i += 1
+        self.assertEqual(i, 0)
 
 
 class TestExtractor():

From ca4742200b90965897bc7d4ea073116e03c4dd6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Tue, 6 Dec 2022 22:26:46 +0100
Subject: [PATCH 07/13] use util.NONE as 'keyword-default' default value

---
 gallery_dl/formatter.py | 14 ++++++++------
 gallery_dl/path.py      |  2 ++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index ca05fa5a..5fe7f32a 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -18,8 +18,10 @@ import operator
 import functools
 from . import text, util
 
+NONE = util.NONE
 
-def parse(format_string, default=None, fmt=format):
+
+def parse(format_string, default=NONE, fmt=format):
     key = format_string, default, fmt
 
     try:
@@ -88,7 +90,7 @@ class StringFormatter():
         Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
     """
 
-    def __init__(self, format_string, default=None, fmt=format):
+    def __init__(self, format_string, default=NONE, fmt=format):
         self.default = default
         self.format = fmt
         self.result = []
@@ -193,7 +195,7 @@ class StringFormatter():
 class TemplateFormatter(StringFormatter):
     """Read format_string from file"""
 
-    def __init__(self, path, default=None, fmt=format):
+    def __init__(self, path, default=NONE, fmt=format):
         with open(util.expand_path(path)) as fp:
             format_string = fp.read()
         StringFormatter.__init__(self, format_string, default, fmt)
@@ -202,14 +204,14 @@ class TemplateFormatter(StringFormatter):
 class ExpressionFormatter():
     """Generate text by evaluating a Python expression"""
 
-    def __init__(self, expression, default=None, fmt=None):
+    def __init__(self, expression, default=NONE, fmt=None):
         self.format_map = util.compile_expression(expression)
 
 
 class ModuleFormatter():
     """Generate text by calling an external function"""
 
-    def __init__(self, function_spec, default=None, fmt=None):
+    def __init__(self, function_spec, default=NONE, fmt=None):
         module_name, _, function_name = function_spec.partition(":")
         module = __import__(module_name)
         self.format_map = getattr(module, function_name)
@@ -218,7 +220,7 @@ class ModuleFormatter():
 class FStringFormatter():
     """Generate text by evaluaring an f-string literal"""
 
-    def __init__(self, fstring, default=None, fmt=None):
+    def __init__(self, fstring, default=NONE, fmt=None):
         self.format_map = util.compile_expression("f'''" + fstring + "'''")
 
 
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 77a33277..3b360e99 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -29,6 +29,8 @@ class PathFormat():
     def __init__(self, extractor):
         config = extractor.config
         kwdefault = config("keywords-default")
+        if kwdefault is None:
+            kwdefault = util.NONE
 
         filename_fmt = config("filename")
         try:

From 202c1210d5ceefe4a32071278901d28080133700 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Tue, 6 Dec 2022 22:39:13 +0100
Subject: [PATCH 08/13] [exhentai] fix pagination

---
 gallery_dl/extractor/exhentai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index a546f684..01375d81 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -516,7 +516,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
                 data["gallery_token"] = gallery.group(3)
                 yield Message.Queue, url + "/", data
 
-            next_url = text.extr(page, 'nexturl = "', '"', None)
+            next_url = text.extr(page, 'nexturl="', '"', None)
             if next_url is not None:
                 if not next_url:
                     return

From 79e52f3539d397ad19ba6c9fced45fa6f47305b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Wed, 7 Dec 2022 00:17:15 +0100
Subject: [PATCH 09/13] [imgth] rewrite

- inherit from GalleryExtractor
- fix image URLs
- better metadata
---
 gallery_dl/extractor/imgth.py | 83 ++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 35 deletions(-)

diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py
index 7e4cce4e..9ae22a91 100644
--- a/gallery_dl/extractor/imgth.py
+++ b/gallery_dl/extractor/imgth.py
@@ -1,60 +1,73 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extract images from https://imgth.com/"""
+"""Extractors for https://imgth.com/"""
 
-from .common import Extractor, Message
+from .common import GalleryExtractor
 from .. import text
 
 
-class ImgthGalleryExtractor(Extractor):
+class ImgthGalleryExtractor(GalleryExtractor):
     """Extractor for image galleries from imgth.com"""
     category = "imgth"
-    subcategory = "gallery"
-    directory_fmt = ("{category}", "{gallery_id} {title}")
-    filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
-    archive_fmt = "{gallery_id}_{num}"
-    pattern = r"(?:https?://)?imgth\.com/gallery/(\d+)"
-    test = ("http://imgth.com/gallery/37/wallpaper-anime", {
-        "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
-        "keyword": "6f8c00d6849ea89d1a028764675ec1fe9dbd87e2",
-    })
+    root = "https://imgth.com"
+    pattern = r"(?:https?://)?(?:www\.)?imgth\.com/gallery/(\d+)"
+    test = (
+        ("https://imgth.com/gallery/37/wallpaper-anime", {
+            "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
+            "pattern": r"https://imgth\.com/images/2009/11/25"
+                       r"/wallpaper-anime_\w+\.jpg",
+            "keyword": {
+                "count": 12,
+                "date": "dt:2009-11-25 18:21:00",
+                "extension": "jpg",
+                "filename": r"re:wallpaper-anime_\w+",
+                "gallery_id": 37,
+                "num": int,
+                "title": "Wallpaper anime",
+                "user": "celebrities",
+            },
+        }),
+        ("https://www.imgth.com/gallery/37/wallpaper-anime"),
+    )
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.gid = match.group(1)
-        self.url_base = "https://imgth.com/gallery/" + self.gid + "/g/page/"
+        self.gallery_id = gid = match.group(1)
+        url = "{}/gallery/{}/g/".format(self.root, gid)
+        GalleryExtractor.__init__(self, match, url)
 
-    def items(self):
-        page = self.request(self.url_base + "0").text
-        data = self.metadata(page)
-        yield Message.Directory, data
-        for data["num"], url in enumerate(self.images(page), 1):
-            yield Message.Url, url, text.nameext_from_url(url, data)
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        return {
+            "gallery_id": text.parse_int(self.gallery_id),
+            "title": text.unescape(extr("<h1>", "</h1>")),
+            "count": text.parse_int(extr(
+                "total of images in this gallery: ", " ")),
+            "date" : text.parse_datetime(
+                extr("created on ", " by <")
+                .replace("th, ", " ", 1).replace("nd, ", " ", 1)
+                .replace("st, ", " ", 1), "%B %d %Y at %H:%M"),
+            "user" : text.unescape(extr(">", "<")),
+        }
 
     def images(self, page):
-        """Yield all image urls for this gallery"""
         pnum = 0
+
         while True:
             thumbs = text.extr(page, '<ul class="thumbnails">', '</ul>')
             for url in text.extract_iter(thumbs, '<img src="', '"'):
-                yield "https://imgth.com/images" + url[24:]
+                path = url.partition("/thumbs/")[2]
+                yield ("{}/images/{}".format(self.root, path), None)
+
             if '<li class="next">' not in page:
                 return
-            pnum += 1
-            page = self.request(self.url_base + str(pnum)).text
 
-    def metadata(self, page):
-        """Collect metadata for extractor-job"""
-        return text.extract_all(page, (
-            ("title", '<h1>', '</h1>'),
-            ("count", 'total of images in this gallery: ', ' '),
-            ("date" , 'created on ', ' by <'),
-            (None   , 'href="/users/', ''),
-            ("user" , '>', '<'),
-        ), values={"gallery_id": self.gid})[0]
+            pnum += 1
+            url = "{}/gallery/{}/g/page/{}".format(
+                self.root, self.gallery_id, pnum)
+            page = self.request(url).text

From 1c25cc7a3e4e9094ca6d333e8a559522dd222a91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Wed, 7 Dec 2022 21:23:45 +0100
Subject: [PATCH 10/13] [warosu] fix and update

---
 gallery_dl/extractor/warosu.py | 62 ++++++++++++++++------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 677680fa..bdedfcbc 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -1,21 +1,22 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extract images from https://warosu.org/"""
+"""Extractors for https://warosu.org/"""
 
 from .common import Extractor, Message
 from .. import text
 
 
 class WarosuThreadExtractor(Extractor):
-    """Extractor for images from threads on warosu.org"""
+    """Extractor for threads on warosu.org"""
     category = "warosu"
     subcategory = "thread"
+    root = "https://warosu.org"
     directory_fmt = ("{category}", "{board}", "{thread} - {title}")
     filename_fmt = "{tim}-{filename}.{extension}"
     archive_fmt = "{board}_{thread}_{tim}"
@@ -31,7 +32,6 @@ class WarosuThreadExtractor(Extractor):
             "content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c",
         }),
     )
-    root = "https://warosu.org"
 
     def __init__(self, match):
         Extractor.__init__(self, match)
@@ -40,12 +40,12 @@ class WarosuThreadExtractor(Extractor):
     def items(self):
         url = "{}/{}/thread/{}".format(self.root, self.board, self.thread)
         page = self.request(url).text
-        data = self.get_metadata(page)
+        data = self.metadata(page)
         posts = self.posts(page)
 
         if not data["title"]:
-            title = text.remove_html(posts[0]["com"])
-            data["title"] = text.unescape(title)[:50]
+            data["title"] = text.unescape(text.remove_html(
+                posts[0]["com"]))[:50]
 
         yield Message.Directory, data
         for post in posts:
@@ -55,25 +55,24 @@ class WarosuThreadExtractor(Extractor):
                 post.update(data)
                 yield Message.Url, post["image"], post
 
-    def get_metadata(self, page):
-        """Collect metadata for extractor-job"""
+    def metadata(self, page):
         boardname = text.extr(page, "<title>", "</title>")
         title = text.extr(page, 'filetitle" itemprop="name">', '<')
         return {
-            "board": self.board,
+            "board"     : self.board,
             "board_name": boardname.rpartition(" - ")[2],
-            "thread": self.thread,
-            "title": title,
+            "thread"    : self.thread,
+            "title"     : title,
         }
 
     def posts(self, page):
-        """Build a list of all post-objects"""
+        """Build a list of all post objects"""
         page = text.extr(page, '<div class="content">', '<table>')
         needle = '<table itemscope itemtype="http://schema.org/Comment">'
         return [self.parse(post) for post in page.split(needle)]
 
     def parse(self, post):
-        """Build post-object by extracting data from an HTML post"""
+        """Build post object by extracting data from an HTML post"""
         data = self._extract_post(post)
         if "<span>File:" in post:
             self._extract_image(post, data)
@@ -84,24 +83,23 @@ class WarosuThreadExtractor(Extractor):
 
     @staticmethod
     def _extract_post(post):
-        data = text.extract_all(post, (
-            ("no"  , 'id="p', '"'),
-            ("name", '<span itemprop="name">', '</span>'),
-            ("time", '<span class="posttime" title="', '000">'),
-            ("now" , '', '<'),
-            ("com" , '<blockquote><p itemprop="text">', '</p></blockquote>'),
-        ))[0]
-        data["com"] = text.unescape(text.remove_html(data["com"].strip()))
-        return data
+        extr = text.extract_from(post)
+        return {
+            "no"  : extr('id="p', '"'),
+            "name": extr('<span itemprop="name">', "</span>"),
+            "time": extr('<span class="posttime" title="', '000">'),
+            "now" : extr("", "<"),
+            "com" : text.unescape(text.remove_html(extr(
+                '<blockquote><p itemprop="text">', '</p></blockquote>'
+            ).strip())),
+        }
 
     @staticmethod
     def _extract_image(post, data):
-        text.extract_all(post, (
-            ("fsize"   , '<span>File: ', ', '),
-            ("w"       , '', 'x'),
-            ("h"       , '', ', '),
-            ("filename", '', '<'),
-            ("image"   , '<br />\n<a href="', '"'),
-        ), 0, data)
-        data["filename"] = text.unquote(data["filename"].rpartition(".")[0])
-        data["image"] = "https:" + data["image"]
+        extr = text.extract_from(post)
+        data["fsize"] = extr("<span>File: ", ", ")
+        data["w"] = extr("", "x")
+        data["h"] = extr("", ", ")
+        data["filename"] = text.unquote(extr("", "<").rpartition(".")[0])
+        extr("<br />", "")
+        data["image"] = "https:" + extr('<a href="', '"')

From 989ec9fc790085278b43cae55f746357c35fbd39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Wed, 7 Dec 2022 21:36:34 +0100
Subject: [PATCH 11/13] [khinsider] fix metadata extraction

---
 gallery_dl/extractor/khinsider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index d5cca1c2..0c3b002f 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -65,7 +65,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
             "count": text.parse_int(extr("Number of Files: <b>", "<")),
             "size" : text.parse_bytes(extr("Total Filesize: <b>", "<")[:-1]),
             "date" : extr("Date Added: <b>", "<"),
-            "type" : extr("Album type: <b>", "<"),
+            "type" : text.remove_html(extr("Album type: <b>", "</b>")),
         }}
 
     def tracks(self, page):

From cd931e1139b2146375eb722bd6fa505e8a71b09c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Thu, 8 Dec 2022 18:58:29 +0100
Subject: [PATCH 12/13] update extractor test results

---
 gallery_dl/extractor/35photo.py      |  1 +
 gallery_dl/extractor/8chan.py        |  4 +--
 gallery_dl/extractor/deviantart.py   | 14 -----------
 gallery_dl/extractor/erome.py        | 37 +++++++++++++++++++---------
 gallery_dl/extractor/exhentai.py     |  8 ++++--
 gallery_dl/extractor/gelbooru_v02.py |  3 ++-
 gallery_dl/extractor/mangadex.py     |  2 +-
 gallery_dl/extractor/redgifs.py      |  4 +--
 gallery_dl/extractor/slickpic.py     |  3 ++-
 gallery_dl/extractor/smugmug.py      |  2 +-
 gallery_dl/extractor/twibooru.py     |  2 +-
 gallery_dl/extractor/twitter.py      |  6 ++---
 gallery_dl/extractor/unsplash.py     | 18 +++++++-------
 gallery_dl/extractor/webtoons.py     |  1 +
 14 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index 28acc3d5..f86691d4 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -146,6 +146,7 @@ class _35photoTagExtractor(_35photoExtractor):
     test = ("https://35photo.pro/tags/landscape/", {
         "range": "1-25",
         "count": 25,
+        "archive": False,
     })
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index 1e020c25..0e128c3a 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -92,8 +92,8 @@ class _8chanThreadExtractor(_8chanExtractor):
                 "uniquePosters": 9,
                 "usesCustomCss": True,
                 "usesCustomJs": False,
-                "wsPort": 8880,
-                "wssPort": 2087,
+                "?wsPort": 8880,
+                "?wssPort": 2087,
             },
         }),
         ("https://8chan.se/vhs/res/4.html"),
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 45beddf3..df59be4a 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -896,20 +896,6 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
             "range": "2-",
             "count": 4,
         }),
-        # video
-        ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
-            "pattern": r"https://wixmp-.+wixmp.com/v/mp4/.+\.720p\.\w+.mp4",
-            "keyword": {
-                "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
-                "extension": "mp4",
-                "target": {
-                    "duration": 306,
-                    "filesize": 19367585,
-                    "quality": "720p",
-                    "src": str,
-                },
-            }
-        }),
         # journal
         ("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
             "url": "d34b2c9f873423e665a1b8ced20fcb75951694a3",
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index b4dadc7e..ad3f16ba 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -92,16 +92,29 @@ class EromeAlbumExtractor(EromeExtractor):
     """Extractor for albums on erome.com"""
     subcategory = "album"
     pattern = BASE_PATTERN + r"/a/(\w+)"
-    test = ("https://www.erome.com/a/TyFMI7ik", {
-        "pattern": r"https://s\d+\.erome\.com/\d+/TyFMI7ik/\w+",
-        "count": 9,
-        "keyword": {
-            "album_id": "TyFMI7ik",
-            "num": int,
-            "title": "Ryan Ryans",
-            "user": "xanub",
-        },
-    })
+    test = (
+        ("https://www.erome.com/a/NQgdlWvk", {
+            "pattern": r"https://v\d+\.erome\.com/\d+"
+                       r"/NQgdlWvk/j7jlzmYB_480p\.mp4",
+            "count": 1,
+            "keyword": {
+                "album_id": "NQgdlWvk",
+                "num": 1,
+                "title": "porn",
+                "user": "yYgWBZw8o8qsMzM",
+            },
+        }),
+        ("https://www.erome.com/a/TdbZ4ogi", {
+            "pattern": r"https://s\d+\.erome\.com/\d+/TdbZ4ogi/\w+",
+            "count": 6,
+            "keyword": {
+                "album_id": "TdbZ4ogi",
+                "num": int,
+                "title": "82e78cfbb461ad87198f927fcb1fda9a1efac9ff.",
+                "user": "yYgWBZw8o8qsMzM",
+            },
+        }),
+    )
 
     def albums(self):
         return (self.item,)
@@ -110,7 +123,7 @@ class EromeAlbumExtractor(EromeExtractor):
 class EromeUserExtractor(EromeExtractor):
     subcategory = "user"
     pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
-    test = ("https://www.erome.com/xanub", {
+    test = ("https://www.erome.com/yYgWBZw8o8qsMzM", {
         "range": "1-25",
         "count": 25,
     })
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 01375d81..dccc74e4 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -117,9 +117,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
                r"|/s/([\da-f]{10})/(\d+)-(\d+))")
     test = (
         ("https://exhentai.org/g/1200119/d55c44d3d0/", {
+            "options": (("original", False),),
             "keyword": {
                 "cost": int,
-                "date": "dt:2018-03-18 20:15:00",
+                "date": "dt:2018-03-18 20:14:00",
                 "eh_category": "Non-H",
                 "expunged": False,
                 "favorites": r"re:^[12]\d$",
@@ -150,7 +151,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
                 "uploader": "klorpa",
                 "width": int,
             },
-            "content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff",
+            "content": ("2c68cff8a7ca540a78c36fdbf5fbae0260484f87",
+                        "e9891a4c017ed0bb734cd1efba5cd03f594d31ff"),
         }),
         ("https://exhentai.org/g/960461/4f0e369d82/", {
             "exception": exception.NotFoundError,
@@ -159,9 +161,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
             "exception": exception.AuthorizationError,
         }),
         ("https://exhentai.org/s/f68367b4c8/1200119-3", {
+            "options": (("original", False),),
             "count": 2,
         }),
         ("https://e-hentai.org/s/f68367b4c8/1200119-3", {
+            "options": (("original", False),),
             "count": 2,
         }),
         ("https://g.e-hentai.org/g/1200119/d55c44d3d0/"),
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index da87b8f1..facd3dbe 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -174,7 +174,8 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
     pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
     test = (
         ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
-            "content": "5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
+            "content": ("5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
+                        "622e80be3f496672c44aab5c47fbc6941c61bc79"),
             "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
             "count": 2,
         }),
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 0bc35274..dae203e7 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -109,7 +109,7 @@ class MangadexChapterExtractor(MangadexExtractor):
         }),
         # 'externalUrl', but still downloadable (#2503)
         ("https://mangadex.org/chapter/364728a4-6909-4164-9eea-6b56354f7c78", {
-            "count": 39,
+            "count": 0,  # 404
         }),
     )
 
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 53e5e790..ad4282c8 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -72,7 +72,7 @@ class RedgifsUserExtractor(RedgifsExtractor):
     pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)"
     test = ("https://www.redgifs.com/users/Natalifiction", {
         "pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4",
-        "count": ">= 120",
+        "count": ">= 100",
     })
 
     def metadata(self):
@@ -89,7 +89,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
     pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/browse/?\?([^#]+)"
     test = (
         ("https://www.redgifs.com/browse?tags=JAV", {
-            "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.mp4",
+            "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
             "range": "1-10",
             "count": 10,
         }),
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index ae4e2e8a..3727c0b0 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -43,7 +43,8 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
         }),
         ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
             "range": "34",
-            "content": ("52b5a310587de1048030ab13a912f6a3a9cc7dab",
+            "content": ("276eb2c902187bb177ae8013e310e1d6641fba9a",
+                        "52b5a310587de1048030ab13a912f6a3a9cc7dab",
                         "cec6630e659dc72db1ee1a9a6f3b525189261988",
                         "6f81e1e74c6cd6db36844e7211eef8e7cd30055d",
                         "22e83645fc242bc3584eca7ec982c8a53a4d8a44"),
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 2264fe48..713d4c41 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -117,7 +117,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
         # video
         ("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
             "url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
-            "keyword": "4cef98133ace511adc874c9d9abac5817ba0d856",
+            "keyword": "2b545184592c282b365fcbb7df6ca7952b8a3173",
         }),
     )
 
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index f010f926..30bf2f15 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -83,7 +83,7 @@ class TwibooruPostExtractor(TwibooruExtractor):
             "tag_ids": list,
             "tags": list,
             "thumbnails_generated": True,
-            "updated_at": "2022-09-21T14:31:50.441Z",
+            "updated_at": "2022-11-27T00:34:50.483Z",
             "upvotes": int,
             "view_url": "https://cdn.twibooru.org/img/2020/7/8/1/full.png",
             "width": 576,
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 22d4a6ec..d0411acf 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -633,7 +633,7 @@ class TwitterEventExtractor(TwitterExtractor):
     pattern = BASE_PATTERN + r"/i/events/(\d+)"
     test = ("https://twitter.com/i/events/1484669206993903616", {
         "range": "1-20",
-        "count": ">5",
+        "count": ">=1",
     })
 
     def metadata(self):
@@ -759,7 +759,7 @@ class TwitterTweetExtractor(TwitterExtractor):
         # retweet with missing media entities (#1555)
         ("https://twitter.com/morino_ya/status/1392763691599237121", {
             "options": (("retweets", True),),
-            "count": 4,
+            "count": 0,  # private
         }),
         # deleted quote tweet (#2225)
         ("https://twitter.com/i/web/status/1460044411165888515", {
@@ -782,7 +782,7 @@ class TwitterTweetExtractor(TwitterExtractor):
         # '?format=...&name=...'-style URLs
         ("https://twitter.com/poco_dandy/status/1150646424461176832", {
             "options": (("cards", True),),
-            "pattern": r"https://pbs.twimg.com/card_img/157\d+/\w+"
+            "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+"
                        r"\?format=(jpg|png)&name=orig$",
             "range": "1-2",
         }),
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 8bea18c7..b298c27e 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -78,11 +78,11 @@ class UnsplashImageExtractor(UnsplashExtractor):
     pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
     test = ("https://unsplash.com/photos/lsoogGC_5dg", {
         "pattern": r"https://images\.unsplash\.com/photo-1586348943529-"
-                   r"beaae6c28db9\?ixid=\w+&ixlib=rb-1.2.1",
+                   r"beaae6c28db9\?ixid=\w+&ixlib=rb-4.0.3",
         "keyword": {
             "alt_description": "re:silhouette of trees near body of water ",
             "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
-            "categories": list,
+            "?  categories": list,
             "color": "#f3c08c",
             "created_at": "2020-04-08T12:29:42Z",
             "date": "dt:2020-04-08 12:29:42",
@@ -108,9 +108,8 @@ class UnsplashImageExtractor(UnsplashExtractor):
                 "name": "Beaver Dam, WI 53916, USA",
                 "position": {
                     "latitude": 43.457769,
-                    "longitude": -88.837329
+                    "longitude": -88.837329,
                 },
-                "title": "Beaver Dam, WI 53916, USA"
             },
             "promoted_at": "2020-04-08T15:12:03Z",
             "sponsorship": None,
@@ -149,7 +148,7 @@ class UnsplashUserExtractor(UnsplashExtractor):
     pattern = BASE_PATTERN + r"/@(\w+)/?$"
     test = ("https://unsplash.com/@davehoefler", {
         "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
-                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
         "range": "1-30",
         "count": 30,
     })
@@ -166,7 +165,7 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
     pattern = BASE_PATTERN + r"/@(\w+)/likes"
     test = ("https://unsplash.com/@davehoefler/likes", {
         "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
-                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
         "range": "1-30",
         "count": 30,
     })
@@ -184,7 +183,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor):
     test = (
         ("https://unsplash.com/collections/3178572/winter", {
             "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
-                       r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+                       r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
             "keyword": {"collection_id": "3178572",
                         "collection_title": "winter"},
             "range": "1-30",
@@ -212,8 +211,9 @@ class UnsplashSearchExtractor(UnsplashExtractor):
     subcategory = "search"
     pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
     test = ("https://unsplash.com/s/photos/hair-style", {
-        "pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+"
-                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+        "pattern": r"https://(images|plus)\.unsplash\.com"
+                   r"/((flagged/|premium_)?photo-\d+-\w+"
+                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
         "range": "1-30",
         "count": 30,
     })
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 8a22fcb7..21f7c21e 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -57,6 +57,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
         }),
         (("https://www.webtoons.com/en/challenge/punderworld"
           "/happy-earth-day-/viewer?title_no=312584&episode_no=40"), {
+            "exception": exception.NotFoundError,
             "keyword": {
                 "comic": "punderworld",
                 "description": str,

From dfe7b23579092223356f8aa0a861f7d2a5600e93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Fri, 9 Dec 2022 19:43:55 +0100
Subject: [PATCH 13/13] support Firefox containers for --cookies-from-browser
 (#3346)

---
 docs/configuration.rst |  4 +-
 gallery_dl/__init__.py |  7 +++-
 gallery_dl/cookies.py  | 86 +++++++++++++++++++++++++++++++-----------
 gallery_dl/option.py   |  8 ++--
 4 files changed, 78 insertions(+), 27 deletions(-)

diff --git a/docs/configuration.rst b/docs/configuration.rst
index 59fa8fc2..f2a3aa35 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -432,15 +432,17 @@ Description
             "isAdult"    : "1"
         }
 
-    * A ``list`` with up to 3 entries specifying a browser profile.
+    * A ``list`` with up to 4 entries specifying a browser profile.
 
       * The first entry is the browser name
       * The optional second entry is a profile name or an absolute path to a profile directory
       * The optional third entry is the keyring to retrieve passwords for decrypting cookies from
+      * The optional fourth entry is a (Firefox) container name (``"none"`` for only cookies with no container)
 
       .. code:: json
 
         ["firefox"]
+        ["firefox", null, null, "Personal"]
         ["chromium", "Private", "kwallet"]
 
 
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 3701d6fd..611b2b92 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -66,7 +66,12 @@ def main():
         if args.cookies_from_browser:
             browser, _, profile = args.cookies_from_browser.partition(":")
             browser, _, keyring = browser.partition("+")
-            config.set((), "cookies", (browser, profile, keyring))
+            if profile.startswith(":"):
+                container = profile[1:]
+                profile = None
+            else:
+                profile, _, container = profile.partition("::")
+            config.set((), "cookies", (browser, profile, keyring, container))
         for opts in args.options:
             config.set(*opts)
 
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 6f9a92db..ee00bf74 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -24,7 +24,7 @@ import tempfile
 from datetime import datetime, timedelta, timezone
 from hashlib import pbkdf2_hmac
 from http.cookiejar import Cookie
-from . import aes
+from . import aes, text
 
 
 SUPPORTED_BROWSERS_CHROMIUM = {
@@ -35,11 +35,10 @@ logger = logging.getLogger("cookies")
 
 
 def load_cookies(cookiejar, browser_specification):
-    browser_name, profile, keyring = \
+    browser_name, profile, keyring, container = \
         _parse_browser_specification(*browser_specification)
-
     if browser_name == "firefox":
-        load_cookies_firefox(cookiejar, profile)
+        load_cookies_firefox(cookiejar, profile, container)
     elif browser_name == "safari":
         load_cookies_safari(cookiejar, profile)
     elif browser_name in SUPPORTED_BROWSERS_CHROMIUM:
@@ -48,12 +47,24 @@ def load_cookies(cookiejar, browser_specification):
         raise ValueError("unknown browser '{}'".format(browser_name))
 
 
-def load_cookies_firefox(cookiejar, profile=None):
-    set_cookie = cookiejar.set_cookie
-    with _firefox_cookies_database(profile) as db:
+def load_cookies_firefox(cookiejar, profile=None, container=None):
+    path, container_id = _firefox_cookies_database(profile, container)
+    with DatabaseCopy(path) as db:
+
+        sql = ("SELECT name, value, host, path, isSecure, expiry "
+               "FROM moz_cookies")
+        parameters = ()
+
+        if container_id is False:
+            sql += " WHERE NOT INSTR(originAttributes,'userContextId=')"
+        elif container_id:
+            sql += " WHERE originAttributes LIKE ? OR originAttributes LIKE ?"
+            uid = "%userContextId={}".format(container_id)
+            parameters = (uid, uid + "&%")
+
+        set_cookie = cookiejar.set_cookie
         for name, value, domain, path, secure, expires in db.execute(
-                "SELECT name, value, host, path, isSecure, expiry  "
-                "FROM moz_cookies"):
+                sql, parameters):
             set_cookie(Cookie(
                 0, name, value, None, False,
                 domain, bool(domain), domain.startswith("."),
@@ -79,9 +90,10 @@ def load_cookies_safari(cookiejar, profile=None):
 
 def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
     config = _get_chromium_based_browser_settings(browser_name)
+    path = _chrome_cookies_database(profile, config)
+    logger.debug("Extracting cookies from %s", path)
 
-    with _chrome_cookies_database(profile, config) as db:
-
+    with DatabaseCopy(path) as db:
         db.text_factory = bytes
         decryptor = get_cookie_decryptor(
             config["directory"], config["keyring"], keyring=keyring)
@@ -134,8 +146,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
 # --------------------------------------------------------------------
 # firefox
 
-def _firefox_cookies_database(profile=None):
-    if profile is None:
+def _firefox_cookies_database(profile=None, container=None):
+    if not profile:
         search_root = _firefox_browser_directory()
     elif _is_path(profile):
         search_root = profile
@@ -146,14 +158,45 @@ def _firefox_cookies_database(profile=None):
     if path is None:
         raise FileNotFoundError("Unable to find Firefox cookies database in "
                                 "{}".format(search_root))
-
     logger.debug("Extracting cookies from %s", path)
-    return DatabaseCopy(path)
+
+    if container == "none":
+        container_id = False
+        logger.debug("Only loading cookies not belonging to any container")
+
+    elif container:
+        containers_path = os.path.join(
+            os.path.dirname(path), "containers.json")
+
+        try:
+            with open(containers_path) as containers:
+                identities = json.load(containers)["identities"]
+        except OSError:
+            logger.error("Unable to read Firefox container database at %s",
+                         containers_path)
+            raise
+        except KeyError:
+            identities = ()
+
+        for context in identities:
+            if container == context.get("name") or container == text.extr(
+                    context.get("l10nID", ""), "userContext", ".label"):
+                container_id = context["userContextId"]
+                break
+        else:
+            raise ValueError("Unable to find Firefox container {}".format(
+                container))
+        logger.debug("Only loading cookies from container '%s' (ID %s)",
+                     container, container_id)
+    else:
+        container_id = None
+
+    return path, container_id
 
 
 def _firefox_browser_directory():
     if sys.platform in ("win32", "cygwin"):
-        return os.path.expandvars(R"%APPDATA%\Mozilla\Firefox\Profiles")
+        return os.path.expandvars(r"%APPDATA%\Mozilla\Firefox\Profiles")
     if sys.platform == "darwin":
         return os.path.expanduser("~/Library/Application Support/Firefox")
     return os.path.expanduser("~/.mozilla/firefox")
@@ -237,7 +280,7 @@ def _safari_parse_cookies_record(data, cookiejar):
 
     cookiejar.set_cookie(Cookie(
         0, name, value, None, False,
-        domain, bool(domain), domain.startswith('.'),
+        domain, bool(domain), domain.startswith("."),
         path, bool(path), is_secure, expiration_date, False,
         None, None, {},
     ))
@@ -265,9 +308,7 @@ def _chrome_cookies_database(profile, config):
     if path is None:
         raise FileNotFoundError("Unable to find {} cookies database in "
                                 "'{}'".format(config["browser"], search_root))
-
-    logger.debug("Extracting cookies from %s", path)
-    return DatabaseCopy(path)
+    return path
 
 
 def _get_chromium_based_browser_settings(browser_name):
@@ -937,11 +978,12 @@ def _is_path(value):
     return os.path.sep in value
 
 
-def _parse_browser_specification(browser, profile=None, keyring=None):
+def _parse_browser_specification(
+        browser, profile=None, keyring=None, container=None):
     if browser not in SUPPORTED_BROWSERS:
         raise ValueError("unsupported browser '{}'".format(browser))
     if keyring and keyring not in SUPPORTED_KEYRINGS:
         raise ValueError("unsupported keyring '{}'".format(keyring))
     if profile and _is_path(profile):
         profile = os.path.expanduser(profile)
-    return browser, profile, keyring
+    return browser, profile, keyring, container
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 4d9a3587..91e9169c 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -142,10 +142,12 @@ def build_parser():
     )
     general.add_argument(
         "--cookies-from-browser",
-        dest="cookies_from_browser", metavar="BROWSER[+KEYRING][:PROFILE]",
+        dest="cookies_from_browser",
+        metavar="BROWSER[+KEYRING][:PROFILE][::CONTAINER]",
         help=("Name of the browser to load cookies from, "
-              "with optional keyring name prefixed with '+' and "
-              "profile prefixed with ':'"),
+              "with optional keyring name prefixed with '+', "
+              "profile prefixed with ':', and "
+              "container prefixed with '::' ('none' for no container)"),
     )
 
     output = parser.add_argument_group("Output Options")