[bellazon] add 'num_internal' & 'num_external' metadata fields (#8415)

This commit is contained in:
Mike Fährmann
2025-10-15 17:25:10 +02:00
parent c794600821
commit b336049fac
2 changed files with 8 additions and 3 deletions

View File

@@ -47,7 +47,7 @@ class BellazonExtractor(Extractor):
post["count"] = data["count"] = len(urls)
yield Message.Directory, data
data["num"] = 0
data["num"] = data["num_internal"] = data["num_external"] = 0
for info, url, url_img in urls:
url = text.unescape(url or url_img)
@@ -59,6 +59,7 @@ class BellazonExtractor(Extractor):
):
continue
data["num"] += 1
data["num_internal"] += 1
if not (alt := text.extr(info, ' alt="', '"')) or (
alt.startswith("post-") and "_thumb." in alt):
name = url
@@ -80,6 +81,8 @@ class BellazonExtractor(Extractor):
yield Message.Url, url, dc
else:
data["num"] += 1
data["num_external"] += 1
yield Message.Queue, url, data
def _pagination(self, base, pnum=None):

View File

@@ -23,7 +23,9 @@ __tests__ = (
"filename" : str,
"extension": "jpg",
"count" : 5,
"num" : range(1, 5),
"num" : range(1, 5),
"num_internal": range(1, 5),
"num_external": 0,
"post" : {
"author_id" : "72476",
"author_slug": "shepherd",
@@ -134,7 +136,7 @@ __tests__ = (
"extension": "mp4",
"filename" : r"re:^\d+$",
"id" : r"re:6361\d\d\d",
"num" : range(2, 11),
"num" : range(2, 12),
"post" : {
"author_id" : "101807",
"author_slug": "rogerdanish",