replace text.rextract() with rextr()

This commit is contained in:
Mike Fährmann
2025-05-23 18:28:58 +02:00
parent fe39b7d8c8
commit b81fc5c124
15 changed files with 23 additions and 25 deletions

View File

@@ -108,7 +108,7 @@ class AryionExtractor(Extractor):
pos = page.find("Next >>")
if pos < 0:
return
url = self.root + text.rextract(page, "href='", "'", pos)[0]
url = self.root + text.rextr(page, "href='", "'", pos)
def _parse_post(self, post_id):
url = "{}/g4/data.php?id={}".format(self.root, post_id)

View File

@@ -68,7 +68,7 @@ class CheveretoImageExtractor(CheveretoExtractor):
extr('url: "', '"'))
if not url or url.endswith("/loading.svg"):
pos = page.find(" download=")
url = text.rextract(page, 'href="', '"', pos)[0]
url = text.rextr(page, 'href="', '"', pos)
if not url.startswith("https://"):
url = util.decrypt_xor(
url, b"seltilovessimpcity@simpcityhatesscrapers",

View File

@@ -2083,8 +2083,7 @@ class DeviantartEclipseAPI():
pos = page.find('\\"name\\":\\"watching\\"')
if pos < 0:
raise exception.NotFoundError("'watching' module ID")
module_id = text.rextract(
page, '\\"id\\":', ',', pos)[0].strip('" ')
module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
self._fetch_csrf_token(page)
return gruser_id, module_id

View File

@@ -188,7 +188,7 @@ class FuraffinityExtractor(Extractor):
pos = page.find('type="submit">Next</button>')
if pos >= 0:
path = text.rextract(page, '<form action="', '"', pos)[0]
path = text.rextr(page, '<form action="', '"', pos)
continue
path = text.extr(page, 'right" href="', '"')
@@ -358,7 +358,7 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor):
for path in text.extract_iter(page, '<a href="', '"'):
yield Message.Queue, self.root + path, data
path = text.rextract(page, 'action="', '"')[0]
path = text.rextr(page, 'action="', '"')
if url.endswith(path):
return
url = self.root + path

View File

@@ -87,4 +87,4 @@ class FuskatorSearchExtractor(Extractor):
pages = text.extr(page, 'class="pages"><span>', '>&gt;&gt;<')
if not pages:
return
url = self.root + text.rextract(pages, 'href="', '"')[0]
url = self.root + text.rextr(pages, 'href="', '"')

View File

@@ -78,7 +78,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
paths += findall(page)
pos = page.find('rel="next" aria-label="Next')
if pos > 0:
url = text.rextract(page, 'href="', '"', pos)[0]
url = text.rextr(page, 'href="', '"', pos)
if url:
page = self.request(url).text
continue

View File

@@ -38,7 +38,7 @@ class ImhentaiExtractor(BaseExtractor):
yield Message.Queue, base + gallery_id, data
prev = gallery_id
href = text.rextract(page, "class='page-link' href='", "'")[0]
href = text.rextr(page, "class='page-link' href='", "'")
if not href or href == "#":
return
if href[0] == "/":

View File

@@ -86,7 +86,7 @@ class KeenspotComicExtractor(Extractor):
pos = page.find('id="first_day1"')
if pos >= 0:
self._next = self._next_id
return text.rextract(page, 'href="', '"', pos)[0]
return text.rextr(page, 'href="', '"', pos)
pos = page.find('>FIRST PAGE<')
if pos >= 0:
@@ -95,7 +95,7 @@ class KeenspotComicExtractor(Extractor):
self._image = '<div id="comic">'
else:
self._next = self._next_id
return text.rextract(page, 'href="', '"', pos)[0]
return text.rextr(page, 'href="', '"', pos)
pos = page.find('<div id="kscomicpart"')
if pos >= 0:
@@ -106,13 +106,13 @@ class KeenspotComicExtractor(Extractor):
if pos >= 0:
self._image = '</header>'
self._needle = 'class="navarchive"'
return text.rextract(page, 'href="', '"', pos)[0]
return text.rextr(page, 'href="', '"', pos)
pos = page.find('id="flip_FirstDay"') # flipside
if pos >= 0:
self._image = 'class="flip_Pages ksc"'
self._needle = 'id="flip_ArcButton"'
return text.rextract(page, 'href="', '"', pos)[0]
return text.rextr(page, 'href="', '"', pos)
self.log.error("Unrecognized page layout")
return None
@@ -128,7 +128,7 @@ class KeenspotComicExtractor(Extractor):
@staticmethod
def _next_id(page):
pos = page.find('id="next_')
return text.rextract(page, 'href="', '"', pos)[0] if pos >= 0 else None
return text.rextr(page, 'href="', '"', pos) if pos >= 0 else None
@staticmethod
def _next_lastblood(page):
@@ -138,5 +138,5 @@ class KeenspotComicExtractor(Extractor):
@staticmethod
def _next_brawl(page):
pos = page.index("comic-nav-next")
url = text.rextract(page, 'href="', '"', pos)[0]
url = text.rextr(page, 'href="', '"', pos)
return None if "?random" in url else url

View File

@@ -94,7 +94,7 @@ class SexcomExtractor(Extractor):
if info:
try:
path, _ = text.rextract(
path = text.rextr(
info, "src: '", "'", info.index("label: 'HD'"))
except ValueError:
path = text.extr(info, "src: '", "'")

View File

@@ -45,8 +45,7 @@ class SubscribestarExtractor(Extractor):
if "<html><body>" in content:
data["content"] = content = text.extr(
content, "<body>", "</body>")
data["title"] = text.unescape(
text.rextract(content, "<h1>", "</h1>")[0] or "")
data["title"] = text.unescape(text.rextr(content, "<h1>", "</h1>"))
yield Message.Directory, data
for num, item in enumerate(media, 1):

View File

@@ -106,7 +106,7 @@ class TapasEpisodeExtractor(TapasExtractor):
yield Message.Url, url, text.nameext_from_url(url, episode)
def _extract_series(self, html):
series_id = text.rextract(html, 'data-series-id="', '"')[0]
series_id = text.rextr(html, 'data-series-id="', '"')
try:
return self._cache[series_id]
except KeyError:

View File

@@ -32,8 +32,8 @@ class WallpapercaveImageExtractor(Extractor):
if path is None:
try:
path = text.rextract(
page, 'href="', '"', page.index('id="tdownload"'))[0]
path = text.rextr(
page, 'href="', '"', page.index('id="tdownload"'), None)
except Exception:
pass
else:
@@ -44,7 +44,7 @@ class WallpapercaveImageExtractor(Extractor):
if path is None:
for wp in text.extract_iter(
page, 'class="wallpaper" id="wp', '</picture>'):
path = text.rextract(wp, ' src="', '"')[0]
path = text.rextr(wp, ' src="', '"')
if path:
image = text.nameext_from_url(path)
yield Message.Directory, image

View File

@@ -200,5 +200,5 @@ class WeasylFavoriteExtractor(WeasylExtractor):
pos = page.index('">Next (', pos)
except ValueError:
return
path = text.unescape(text.rextract(page, 'href="', '"', pos)[0])
path = text.unescape(text.rextr(page, 'href="', '"', pos))
params = None

View File

@@ -70,7 +70,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
return
while len(results) % 500 == 0:
path = text.rextract(page, ' href="', '"', page.find(">Next</"))[0]
path = text.rextr(page, ' href="', '"', page.find(">Next</"))
if not path:
break
page = self.request(self.root + path).text

View File

@@ -56,7 +56,7 @@ class ZzupGalleryExtractor(GalleryExtractor):
pos = page.find("glyphicon-arrow-right")
if pos < 0:
break
path = text.rextract(page, ' href="', '"', pos)[0]
path = text.rextr(page, ' href="', '"', pos)
page = self.request(text.urljoin(self.gallery_url, path)).text
return results