[kemono] extend 'duplicates' option (#7696)
allow selecting which file type(s) ('file'/'attachment'/'inline')
not to consider when discarding duplicates
This commit is contained in:
@@ -693,9 +693,7 @@ Description
|
||||
specified after a ``:`` (``windows``, ``linux``, or ``macos``).
|
||||
|
||||
Note:
|
||||
This option overrides
|
||||
`user-agent <extractor.*.user-agent_>`__
|
||||
and sets custom
|
||||
This option sets custom
|
||||
`headers <extractor.*.headers_>`__
|
||||
and
|
||||
`ciphers <extractor.*.ciphers_>`__
|
||||
@@ -3205,14 +3203,24 @@ Description
|
||||
extractor.kemono.duplicates
|
||||
---------------------------
|
||||
Type
|
||||
``bool``
|
||||
* ``bool``
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``false``
|
||||
Example
|
||||
* ``"attachment,inline"``
|
||||
* ``["file", "attachment"]``
|
||||
Description
|
||||
Controls how to handle duplicate files in a post.
|
||||
|
||||
* ``true``: Download duplicates
|
||||
* ``false``: Ignore duplicates
|
||||
``true``
|
||||
Download duplicates
|
||||
``false``
|
||||
Ignore duplicates
|
||||
any ``list`` or ``string``
|
||||
| Download a duplicate file if its ``type`` is in the given list
|
||||
| Ignore it otherwise
|
||||
|
||||
|
||||
extractor.kemono.dms
|
||||
|
||||
@@ -56,12 +56,21 @@ class KemonoExtractor(Extractor):
|
||||
announcements = True if self.config("announcements") else None
|
||||
archives = True if self.config("archives") else False
|
||||
comments = True if self.config("comments") else False
|
||||
duplicates = True if self.config("duplicates") else False
|
||||
dms = True if self.config("dms") else None
|
||||
max_posts = self.config("max-posts")
|
||||
creator_info = {} if self.config("metadata", True) else None
|
||||
exts_archive = util.EXTS_ARCHIVE
|
||||
|
||||
if duplicates := self.config("duplicates"):
|
||||
if isinstance(duplicates, str):
|
||||
duplicates = set(duplicates.split(","))
|
||||
elif isinstance(duplicates, (list, tuple)):
|
||||
duplicates = set(duplicates)
|
||||
else:
|
||||
duplicates = {"file", "attachment", "inline"}
|
||||
else:
|
||||
duplicates = ()
|
||||
|
||||
# prevent files from being sent with gzip compression
|
||||
headers = {"Accept-Encoding": "identity"}
|
||||
|
||||
@@ -123,14 +132,13 @@ class KemonoExtractor(Extractor):
|
||||
if "\\" in url:
|
||||
file["path"] = url = url.replace("\\", "/")
|
||||
|
||||
match = find_hash(url)
|
||||
if match:
|
||||
if match := find_hash(url):
|
||||
file["hash"] = hash = match[1]
|
||||
if not duplicates:
|
||||
if hash in hashes:
|
||||
self.log.debug("Skipping %s (duplicate)", url)
|
||||
continue
|
||||
hashes.add(hash)
|
||||
if file["type"] not in duplicates and hash in hashes:
|
||||
self.log.debug("Skipping %s %s (duplicate)",
|
||||
file["type"], url)
|
||||
continue
|
||||
hashes.add(hash)
|
||||
else:
|
||||
file["hash"] = hash = ""
|
||||
|
||||
@@ -150,7 +158,7 @@ class KemonoExtractor(Extractor):
|
||||
file["type"] = "archive"
|
||||
if archives:
|
||||
try:
|
||||
data = self.api.file(file["hash"])
|
||||
data = self.api.file(hash)
|
||||
data.update(file)
|
||||
post_archives.append(data)
|
||||
except Exception as exc:
|
||||
|
||||
Reference in New Issue
Block a user