[util] use functions for predicates

more lightweight and faster than classes
This commit is contained in:
Mike Fährmann
2026-01-18 20:21:46 +01:00
parent 90cd23e279
commit c23beee57c
4 changed files with 129 additions and 138 deletions

View File

@@ -36,7 +36,7 @@ class TiktokExtractor(Extractor):
self.cover = self.config("covers", False)
self.range = self.config("tiktok-range") or ""
self.range_predicate = util.RangePredicate(self.range)
self.range_predicate = util.predicate_range(self.range)
def items(self):
for tiktok_url in self.posts():

View File

@@ -274,32 +274,28 @@ class Job():
self.pred_post = self._prepare_predicates("post", False)
self.pred_queue = self._prepare_predicates("chapter", False)
def _prepare_predicates(self, target, skip=True):
def _prepare_predicates(self, target, skip):
predicates = []
extr = self.extractor
if self.extractor.config(f"{target}-unique"):
predicates.append(util.UniquePredicate())
if extr.config(target + "-unique"):
predicates.append(util.predicate_unique())
if pfilter := self.extractor.config(f"{target}-filter"):
if pfilter := extr.config(target + "-filter"):
try:
pred = util.FilterPredicate(pfilter, target)
predicates.append(util.predicate_filter(pfilter, target))
except (SyntaxError, ValueError, TypeError) as exc:
self.extractor.log.warning(exc)
else:
predicates.append(pred)
extr.log.warning(exc)
if prange := self.extractor.config(f"{target}-range"):
if prange := extr.config(target + "-range"):
try:
pred = util.RangePredicate(prange)
skip = extr.skip if skip and not pfilter else None
predicates.append(util.predicate_range(prange, skip))
except ValueError as exc:
self.extractor.log.warning(
extr.log.warning(
"invalid %s range: %s", target, exc)
else:
if skip and pred.lower > 1 and not pfilter:
pred.index += self.extractor.skip(pred.lower - 1)
predicates.append(pred)
return util.build_predicate(predicates)
return util.predicate_build(predicates)
def get_logger(self, name):
return self._wrap_logger(logging.getLogger(name))

View File

@@ -957,115 +957,110 @@ def build_proxy_map(proxies, log=None):
return proxies
def build_predicate(predicates):
def predicate_build(predicates):
if not predicates:
return true
elif len(predicates) == 1:
if len(predicates) == 1:
return predicates[0]
return functools.partial(chain_predicates, predicates)
def chain(url, kwdict):
for pred in predicates:
if not pred(url, kwdict):
return False
return True
return chain
def chain_predicates(predicates, url, kwdict):
for pred in predicates:
if not pred(url, kwdict):
return False
return True
class RangePredicate():
"""Predicate; True if the current index is in the given range(s)"""
def __init__(self, rangespec):
self.ranges = ranges = self._parse(rangespec)
self.index = 0
if ranges:
# technically wrong, but good enough for now
# and evaluating min/max for a large range is slow
self.lower = min(r.start for r in ranges)
self.upper = max(r.stop for r in ranges) - 1
else:
self.lower = 0
self.upper = 0
def __call__(self, _url, _kwdict):
self.index = index = self.index + 1
if index > self.upper:
raise exception.StopExtraction()
for range in self.ranges:
if index in range:
return True
return False
def _parse(self, rangespec):
"""Parse an integer range string and return the resulting ranges
Examples:
_parse("-2,4,6-8,10-") -> [(1,3), (4,5), (6,9), (10,INTMAX)]
_parse(" - 3 , 4- 4, 2-6") -> [(1,4), (4,5), (2,7)]
_parse("1:2,4:8:2") -> [(1,1), (4,7,2)]
"""
ranges = []
if isinstance(rangespec, str):
rangespec = rangespec.split(",")
elif isinstance(rangespec, int):
rangespec = (str(rangespec),)
for group in rangespec:
if not group:
continue
elif ":" in group:
start, _, stop = group.partition(":")
stop, _, step = stop.partition(":")
ranges.append(range(
int(start) if start.strip() else 1,
int(stop) if stop.strip() else sys.maxsize,
int(step) if step.strip() else 1,
))
elif "-" in group:
start, _, stop = group.partition("-")
ranges.append(range(
int(start) if start.strip() else 1,
int(stop) + 1 if stop.strip() else sys.maxsize,
))
else:
start = int(group)
ranges.append(range(start, start+1))
return ranges
class UniquePredicate():
def predicate_unique():
"""Predicate; True if given URL has not been encountered before"""
def __init__(self):
self.urls = set()
def __call__(self, url, _):
def _pred(url, _):
if url.startswith("text:"):
return True
if url not in self.urls:
self.urls.add(url)
if url not in urls:
urls.add(url)
return True
return False
urls = set()
return _pred
class FilterPredicate():
def predicate_filter(expr, target="image"):
"""Predicate; True if evaluating the given expression returns True"""
def __init__(self, expr, target="image"):
name = f"<{target} filter>"
self.expr = compile_filter(expr, name)
def __call__(self, _, kwdict):
def _pred(_, kwdict):
try:
return self.expr(kwdict)
return expr(kwdict)
except exception.GalleryDLException:
raise
except Exception as exc:
raise exception.FilterError(exc)
expr = compile_filter(expr, f"<{target} filter>")
return _pred
def predicate_range(ranges, skip=None):
"""Predicate; True if the current index is in the given range(s)"""
if ranges := predicate_range_parse(ranges):
# technically wrong for 'step > 2', but good enough for now
# and evaluating min/max for a large range is slow
upper = max(r.stop for r in ranges) - 1
lower = min(r.start for r in ranges)
index = 0 if skip is None or lower <= 1 else skip(lower)
del lower
else:
index = upper = 0
def _pred(_url, _kwdict):
nonlocal index
if index >= upper:
raise exception.StopExtraction()
index += 1
for range in ranges:
if index in range:
return True
return False
return _pred
def predicate_range_parse(rangespec):
"""Parse an integer range string and return the resulting ranges
Examples:
_parse("-2,4,6-8,10-") -> [(1,3), (4,5), (6,9), (10,INTMAX)]
_parse(" - 3 , 4- 4, 2-6") -> [(1,4), (4,5), (2,7)]
_parse("1:2,4:8:2") -> [(1,1), (4,7,2)]
"""
ranges = []
if isinstance(rangespec, str):
rangespec = rangespec.split(",")
elif isinstance(rangespec, int):
rangespec = (str(rangespec),)
for group in rangespec:
if not group:
continue
elif ":" in group:
start, _, stop = group.partition(":")
stop, _, step = stop.partition(":")
ranges.append(range(
int(start) if start.strip() else 1,
int(stop) if stop.strip() else sys.maxsize,
int(step) if step.strip() else 1,
))
elif "-" in group:
start, _, stop = group.partition("-")
ranges.append(range(
int(start) if start.strip() else 1,
int(stop) + 1 if stop.strip() else sys.maxsize,
))
else:
start = int(group)
ranges.append(range(start, start+1))
return ranges