replace standard library 're' uses with 'util.re()'
This commit is contained in:
@@ -9,10 +9,8 @@
|
|||||||
"""Extractors for https://agn.ph/"""
|
"""Extractors for https://agn.ph/"""
|
||||||
|
|
||||||
from . import booru
|
from . import booru
|
||||||
from .. import text
|
from .. import text, util
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?agn\.ph"
|
BASE_PATTERN = r"(?:https?://)?agn\.ph"
|
||||||
|
|
||||||
@@ -72,7 +70,7 @@ class AgnphExtractor(booru.BooruExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
tags = collections.defaultdict(list)
|
tags = collections.defaultdict(list)
|
||||||
pattern = re.compile(r'class="(.)typetag">([^<]+)')
|
pattern = util.re(r'class="(.)typetag">([^<]+)')
|
||||||
for tag_type, tag_name in pattern.findall(tag_container):
|
for tag_type, tag_name in pattern.findall(tag_container):
|
||||||
tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
|
tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
|
||||||
for key, value in tags.items():
|
for key, value in tags.items():
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
|
||||||
|
|
||||||
@@ -65,8 +64,8 @@ class ArcalivePostExtractor(ArcaliveExtractor):
|
|||||||
def _extract_files(self, post):
|
def _extract_files(self, post):
|
||||||
files = []
|
files = []
|
||||||
|
|
||||||
for video, media in self._extract_media(post["content"]):
|
for video, media in util.re(r"<(?:img|vide(o)) ([^>]+)").findall(
|
||||||
|
post["content"]):
|
||||||
if not self.emoticons and 'class="arca-emoticon"' in media:
|
if not self.emoticons and 'class="arca-emoticon"' in media:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -113,11 +112,6 @@ class ArcalivePostExtractor(ArcaliveExtractor):
|
|||||||
|
|
||||||
return files
|
return files
|
||||||
|
|
||||||
def _extract_media(self, content):
|
|
||||||
ArcalivePostExtractor._extract_media = extr = re.compile(
|
|
||||||
r"<(?:img|vide(o)) ([^>]+)").findall
|
|
||||||
return extr(content)
|
|
||||||
|
|
||||||
|
|
||||||
class ArcaliveBoardExtractor(ArcaliveExtractor):
|
class ArcaliveBoardExtractor(ArcaliveExtractor):
|
||||||
"""Extractor for an arca.live board's posts"""
|
"""Extractor for an arca.live board's posts"""
|
||||||
|
|||||||
@@ -7,8 +7,7 @@
|
|||||||
"""Extractors for https://bato.to/"""
|
"""Extractors for https://bato.to/"""
|
||||||
|
|
||||||
from .common import Extractor, ChapterExtractor, MangaExtractor
|
from .common import Extractor, ChapterExtractor, MangaExtractor
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = (r"(?:https?://)?("
|
BASE_PATTERN = (r"(?:https?://)?("
|
||||||
r"(?:ba|d|f|h|j|m|w)to\.to|"
|
r"(?:ba|d|f|h|j|m|w)to\.to|"
|
||||||
@@ -104,9 +103,9 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
|
|||||||
info = text.remove_html(extr('link-hover">', "</"))
|
info = text.remove_html(extr('link-hover">', "</"))
|
||||||
info = text.unescape(info)
|
info = text.unescape(info)
|
||||||
|
|
||||||
match = re.match(
|
match = util.re(
|
||||||
r"(?i)(?:(?:Volume|S(?:eason)?)\s*(\d+)\s+)?"
|
r"(?i)(?:(?:Volume|S(?:eason)?)\s*(\d+)\s+)?"
|
||||||
r"(?:Chapter|Episode)\s*(\d+)([\w.]*)", info)
|
r"(?:Chapter|Episode)\s*(\d+)([\w.]*)").match(info)
|
||||||
if match:
|
if match:
|
||||||
volume, chapter, minor = match.groups()
|
volume, chapter, minor = match.groups()
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class BloggerExtractor(BaseExtractor):
|
class BloggerExtractor(BaseExtractor):
|
||||||
@@ -33,13 +32,13 @@ class BloggerExtractor(BaseExtractor):
|
|||||||
blog["date"] = text.parse_datetime(blog["published"])
|
blog["date"] = text.parse_datetime(blog["published"])
|
||||||
del blog["selfLink"]
|
del blog["selfLink"]
|
||||||
|
|
||||||
sub = re.compile(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)").sub
|
sub = util.re(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)").sub
|
||||||
findall_image = re.compile(
|
findall_image = util.re(
|
||||||
r'src="(https?://(?:'
|
r'src="(https?://(?:'
|
||||||
r'blogger\.googleusercontent\.com/img|'
|
r'blogger\.googleusercontent\.com/img|'
|
||||||
r'lh\d+(?:-\w+)?\.googleusercontent\.com|'
|
r'lh\d+(?:-\w+)?\.googleusercontent\.com|'
|
||||||
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
|
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
|
||||||
findall_video = re.compile(
|
findall_video = util.re(
|
||||||
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
|
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
|
||||||
metadata = self.metadata()
|
metadata = self.metadata()
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ import collections
|
|||||||
import mimetypes
|
import mimetypes
|
||||||
import binascii
|
import binascii
|
||||||
import time
|
import time
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
r"(?:https?://)?(?:"
|
r"(?:https?://)?(?:"
|
||||||
@@ -66,10 +65,13 @@ class DeviantartExtractor(Extractor):
|
|||||||
if self.quality:
|
if self.quality:
|
||||||
if self.quality == "png":
|
if self.quality == "png":
|
||||||
self.quality = "-fullview.png?"
|
self.quality = "-fullview.png?"
|
||||||
self.quality_sub = re.compile(r"-fullview\.[a-z0-9]+\?").sub
|
self.quality_sub = util.re(r"-fullview\.[a-z0-9]+\?").sub
|
||||||
else:
|
else:
|
||||||
self.quality = ",q_{}".format(self.quality)
|
self.quality = ",q_{}".format(self.quality)
|
||||||
self.quality_sub = re.compile(r",q_\d+").sub
|
self.quality_sub = util.re(r",q_\d+").sub
|
||||||
|
|
||||||
|
if self.intermediary:
|
||||||
|
self.intermediary_subn = util.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
|
||||||
|
|
||||||
if isinstance(self.original, str) and \
|
if isinstance(self.original, str) and \
|
||||||
self.original.lower().startswith("image"):
|
self.original.lower().startswith("image"):
|
||||||
@@ -271,7 +273,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# filename metadata
|
# filename metadata
|
||||||
sub = re.compile(r"\W").sub
|
sub = util.re(r"\W").sub
|
||||||
deviation["filename"] = "".join((
|
deviation["filename"] = "".join((
|
||||||
sub("_", deviation["title"].lower()), "_by_",
|
sub("_", deviation["title"].lower()), "_by_",
|
||||||
sub("_", deviation["author"]["username"].lower()), "-d",
|
sub("_", deviation["author"]["username"].lower()), "-d",
|
||||||
@@ -666,8 +668,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
|
|||||||
if content["src"].startswith("https://images-wixmp-"):
|
if content["src"].startswith("https://images-wixmp-"):
|
||||||
if self.intermediary and deviation["index"] <= 790677560:
|
if self.intermediary and deviation["index"] <= 790677560:
|
||||||
# https://github.com/r888888888/danbooru/issues/4069
|
# https://github.com/r888888888/danbooru/issues/4069
|
||||||
intermediary, count = re.subn(
|
intermediary, count = self.intermediary_subn(
|
||||||
r"(/f/[^/]+/[^/]+)/v\d+/.*",
|
|
||||||
r"/intermediary\1", content["src"], 1)
|
r"/intermediary\1", content["src"], 1)
|
||||||
if count:
|
if count:
|
||||||
deviation["is_original"] = False
|
deviation["is_original"] = False
|
||||||
@@ -682,8 +683,8 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _find_folder(folders, name, uuid):
|
def _find_folder(folders, name, uuid):
|
||||||
if uuid.isdecimal():
|
if uuid.isdecimal():
|
||||||
match = re.compile(name.replace(
|
match = util.re(
|
||||||
"-", r"[^a-z0-9]+") + "$", re.IGNORECASE).match
|
"(?i)" + name.replace("-", "[^a-z0-9]+") + "$").match
|
||||||
for folder in folders:
|
for folder in folders:
|
||||||
if match(folder["name"]):
|
if match(folder["name"]):
|
||||||
return folder
|
return folder
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
|
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
|
||||||
|
|
||||||
@@ -47,12 +46,11 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
|
|||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
match = re.match(
|
match = util.re(
|
||||||
(r"(?:<a[^>]*>)?([^<]+)(?:</a>)?" # manga name
|
r"(?:<a[^>]*>)?([^<]+)(?:</a>)?" # manga name
|
||||||
r"(?: ch(\d+)([^:<]*))?" # chapter info
|
r"(?: ch(\d+)([^:<]*))?" # chapter info
|
||||||
r"(?:: (.+))?"), # title
|
r"(?:: (.+))?" # title
|
||||||
extr("<h3 id='chapter-title'><b>", "</b>"),
|
).match(extr("<h3 id='chapter-title'><b>", "</b>"))
|
||||||
)
|
|
||||||
author = extr(" by ", "</a>")
|
author = extr(" by ", "</a>")
|
||||||
group = extr('"icon-print"></i> ', '</span>')
|
group = extr('"icon-print"></i> ', '</span>')
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,7 @@
|
|||||||
"""Extractors for https://everia.club"""
|
"""Extractors for https://everia.club"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?everia\.club"
|
BASE_PATTERN = r"(?:https?://)?everia\.club"
|
||||||
|
|
||||||
@@ -26,7 +25,7 @@ class EveriaExtractor(Extractor):
|
|||||||
return self._pagination(self.groups[0])
|
return self._pagination(self.groups[0])
|
||||||
|
|
||||||
def _pagination(self, path, params=None, pnum=1):
|
def _pagination(self, path, params=None, pnum=1):
|
||||||
find_posts = re.compile(r'thumbnail">\s*<a href="([^"]+)').findall
|
find_posts = util.re(r'thumbnail">\s*<a href="([^"]+)').findall
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if pnum == 1:
|
if pnum == 1:
|
||||||
@@ -53,7 +52,7 @@ class EveriaPostExtractor(EveriaExtractor):
|
|||||||
url = self.root + self.groups[0] + "/"
|
url = self.root + self.groups[0] + "/"
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
content = text.extr(page, 'itemprop="text">', "<h3")
|
content = text.extr(page, 'itemprop="text">', "<h3")
|
||||||
urls = re.findall(r'img.*?src="([^"]+)', content)
|
urls = util.re(r'img.*?src="([^"]+)').findall(content)
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"title": text.unescape(
|
"title": text.unescape(
|
||||||
|
|||||||
@@ -9,7 +9,6 @@
|
|||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fanbox\.cc"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fanbox\.cc"
|
||||||
USER_PATTERN = (
|
USER_PATTERN = (
|
||||||
@@ -211,7 +210,7 @@ class FanboxExtractor(Extractor):
|
|||||||
num = 0
|
num = 0
|
||||||
cover_image = post.get("coverImageUrl")
|
cover_image = post.get("coverImageUrl")
|
||||||
if cover_image:
|
if cover_image:
|
||||||
cover_image = re.sub("/c/[0-9a-z_]+", "", cover_image)
|
cover_image = util.re("/c/[0-9a-z_]+").sub("", cover_image)
|
||||||
final_post = post.copy()
|
final_post = post.copy()
|
||||||
final_post["isCoverImage"] = True
|
final_post["isCoverImage"] = True
|
||||||
final_post["fileUrl"] = cover_image
|
final_post["fileUrl"] = cover_image
|
||||||
|
|||||||
@@ -10,9 +10,7 @@
|
|||||||
|
|
||||||
from . import booru
|
from . import booru
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class GelbooruV02Extractor(booru.BooruExtractor):
|
class GelbooruV02Extractor(booru.BooruExtractor):
|
||||||
@@ -77,7 +75,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
params["pid"] = self.page_start * self.per_page
|
params["pid"] = self.page_start * self.per_page
|
||||||
|
|
||||||
data = {}
|
data = {}
|
||||||
find_ids = re.compile(r"\sid=\"p(\d+)").findall
|
find_ids = util.re(r"\sid=\"p(\d+)").findall
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
page = self.request(url, params=params).text
|
page = self.request(url, params=params).text
|
||||||
@@ -108,8 +106,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
tags = collections.defaultdict(list)
|
tags = collections.defaultdict(list)
|
||||||
pattern = re.compile(
|
pattern = util.re(r"(?s)tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)")
|
||||||
r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
|
|
||||||
for tag_type, tag_name in pattern.findall(tag_container):
|
for tag_type, tag_name in pattern.findall(tag_container):
|
||||||
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
|
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
|
||||||
for key, value in tags.items():
|
for key, value in tags.items():
|
||||||
|
|||||||
@@ -7,9 +7,8 @@
|
|||||||
"""Generic information extractor"""
|
"""Generic information extractor"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import config, text
|
from .. import config, text, util
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class GenericExtractor(Extractor):
|
class GenericExtractor(Extractor):
|
||||||
@@ -172,8 +171,8 @@ class GenericExtractor(Extractor):
|
|||||||
r"(?:[^\"'<>\s]*)?" # optional query and fragment
|
r"(?:[^\"'<>\s]*)?" # optional query and fragment
|
||||||
)
|
)
|
||||||
|
|
||||||
imageurls_src = re.findall(imageurl_pattern_src, page)
|
imageurls_src = util.re(imageurl_pattern_src).findall(page)
|
||||||
imageurls_ext = re.findall(imageurl_pattern_ext, page)
|
imageurls_ext = util.re(imageurl_pattern_ext).findall(page)
|
||||||
imageurls = imageurls_src + imageurls_ext
|
imageurls = imageurls_src + imageurls_ext
|
||||||
|
|
||||||
# Resolve relative urls
|
# Resolve relative urls
|
||||||
@@ -182,8 +181,8 @@ class GenericExtractor(Extractor):
|
|||||||
# by prepending a suitable base url.
|
# by prepending a suitable base url.
|
||||||
#
|
#
|
||||||
# If the page contains a <base> element, use it as base url
|
# If the page contains a <base> element, use it as base url
|
||||||
basematch = re.search(
|
basematch = util.re(
|
||||||
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)", page)
|
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)").search(page)
|
||||||
if basematch:
|
if basematch:
|
||||||
self.baseurl = basematch.group('url').rstrip('/')
|
self.baseurl = basematch.group('url').rstrip('/')
|
||||||
# Otherwise, extract the base url from self.url
|
# Otherwise, extract the base url from self.url
|
||||||
|
|||||||
@@ -6,9 +6,8 @@
|
|||||||
|
|
||||||
"""Extractors for https://hatenablog.com"""
|
"""Extractors for https://hatenablog.com"""
|
||||||
|
|
||||||
import re
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text, util
|
||||||
|
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
@@ -31,7 +30,7 @@ class HatenablogExtractor(Extractor):
|
|||||||
self.domain = match.group(1) or match.group(2)
|
self.domain = match.group(1) or match.group(2)
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self._find_img = re.compile(r'<img +([^>]+)').finditer
|
self._find_img = util.re(r'<img +([^>]+)').finditer
|
||||||
|
|
||||||
def _handle_article(self, article: str):
|
def _handle_article(self, article: str):
|
||||||
extr = text.extract_from(article)
|
extr = text.extract_from(article)
|
||||||
@@ -74,7 +73,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
|
|||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
HatenablogExtractor._init(self)
|
HatenablogExtractor._init(self)
|
||||||
self._find_pager_url = re.compile(
|
self._find_pager_url = util.re(
|
||||||
r' class="pager-next">\s*<a href="([^"]+)').search
|
r' class="pager-next">\s*<a href="([^"]+)').search
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class Hentai2readBase():
|
class Hentai2readBase():
|
||||||
@@ -31,8 +30,9 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
|
|||||||
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
|
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
|
||||||
chapter, sep, minor = self.groups[1].partition(".")
|
chapter, sep, minor = self.groups[1].partition(".")
|
||||||
|
|
||||||
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - "
|
match = util.re(
|
||||||
r"([^:]+): (.+) . Page 1 ", title)
|
r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - "
|
||||||
|
r"([^:]+): (.+) . Page 1 ").match(title)
|
||||||
if match:
|
if match:
|
||||||
manga, type, author, _, title = match.groups()
|
manga, type, author, _, title = match.groups()
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class HentaihereBase():
|
class HentaihereBase():
|
||||||
@@ -34,8 +33,9 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
|
|||||||
title = text.extr(page, "<title>", "</title>")
|
title = text.extr(page, "<title>", "</title>")
|
||||||
chapter_id = text.extr(page, 'report/C', '"')
|
chapter_id = text.extr(page, 'report/C', '"')
|
||||||
chapter, sep, minor = self.chapter.partition(".")
|
chapter, sep, minor = self.chapter.partition(".")
|
||||||
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
|
match = util.re(
|
||||||
match = re.match(pattern, title)
|
r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by "
|
||||||
|
r"(.+) at ").match(title)
|
||||||
return {
|
return {
|
||||||
"manga": match.group(1),
|
"manga": match.group(1),
|
||||||
"manga_id": text.parse_int(self.manga_id),
|
"manga_id": text.parse_int(self.manga_id),
|
||||||
|
|||||||
@@ -9,9 +9,8 @@
|
|||||||
"""Extractors for https://hiperdex.com/"""
|
"""Extractors for https://hiperdex.com/"""
|
||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text
|
from .. import text, util
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
|
BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
|
||||||
r"(?:1st)?hiper(?:dex|toon)\d?\.(?:com|net|info|top))")
|
r"(?:1st)?hiper(?:dex|toon)\d?\.(?:com|net|info|top))")
|
||||||
@@ -80,10 +79,10 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
|
|||||||
return self.chapter_data(self.chapter)
|
return self.chapter_data(self.chapter)
|
||||||
|
|
||||||
def images(self, page):
|
def images(self, page):
|
||||||
|
pattern = util.re(r'id="image-\d+"\s+(?:data-)?src="([^"]+)')
|
||||||
return [
|
return [
|
||||||
(url.strip(), None)
|
(url.strip(), None)
|
||||||
for url in re.findall(
|
for url in pattern.findall(page)
|
||||||
r'id="image-\d+"\s+(?:data-)?src="([^"]+)', page)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ from .nozomi import decode_nozomi
|
|||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
import string
|
import string
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class HitomiExtractor(Extractor):
|
class HitomiExtractor(Extractor):
|
||||||
@@ -257,8 +256,8 @@ def _parse_gg(extr):
|
|||||||
m = {}
|
m = {}
|
||||||
|
|
||||||
keys = []
|
keys = []
|
||||||
for match in re.finditer(
|
for match in util.re_compile(
|
||||||
r"case\s+(\d+):(?:\s*o\s*=\s*(\d+))?", page):
|
r"case\s+(\d+):(?:\s*o\s*=\s*(\d+))?").finditer(page):
|
||||||
key, value = match.groups()
|
key, value = match.groups()
|
||||||
keys.append(int(key))
|
keys.append(int(key))
|
||||||
|
|
||||||
@@ -268,11 +267,11 @@ def _parse_gg(extr):
|
|||||||
m[key] = value
|
m[key] = value
|
||||||
keys.clear()
|
keys.clear()
|
||||||
|
|
||||||
for match in re.finditer(
|
for match in util.re_compile(
|
||||||
r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)", page):
|
r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)").finditer(page):
|
||||||
m[int(match.group(1))] = int(match.group(2))
|
m[int(match.group(1))] = int(match.group(2))
|
||||||
|
|
||||||
d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page)
|
d = util.re_compile(r"(?:var\s|default:)\s*o\s*=\s*(\d+)").search(page)
|
||||||
b = re.search(r"b:\s*[\"'](.+)[\"']", page)
|
b = util.re_compile(r"b:\s*[\"'](.+)[\"']").search(page)
|
||||||
|
|
||||||
return m, b.group(1).strip("/"), int(d.group(1)) if d else 0
|
return m, b.group(1).strip("/"), int(d.group(1)) if d else 0
|
||||||
|
|||||||
@@ -9,8 +9,7 @@
|
|||||||
"""Extractors for https://www.imagebam.com/"""
|
"""Extractors for https://www.imagebam.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class ImagebamExtractor(Extractor):
|
class ImagebamExtractor(Extractor):
|
||||||
@@ -70,9 +69,8 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
|
|||||||
page, 'id="gallery-name">', '<').strip())}
|
page, 'id="gallery-name">', '<').strip())}
|
||||||
|
|
||||||
def images(self, page):
|
def images(self, page):
|
||||||
findall = re.compile(r'<a href="https://www\.imagebam\.com'
|
findall = util.re(r'<a href="https://www\.imagebam\.com'
|
||||||
r'(/(?:image/|view/M)[a-zA-Z0-9]+)').findall
|
r'(/(?:image/|view/M)[a-zA-Z0-9]+)').findall
|
||||||
|
|
||||||
paths = []
|
paths = []
|
||||||
while True:
|
while True:
|
||||||
paths += findall(page)
|
paths += findall(page)
|
||||||
|
|||||||
@@ -9,8 +9,7 @@
|
|||||||
"""Extractors for https://imgbox.com/"""
|
"""Extractors for https://imgbox.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, AsynchronousMixin
|
from .common import Extractor, Message, AsynchronousMixin
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class ImgboxExtractor(Extractor):
|
class ImgboxExtractor(Extractor):
|
||||||
@@ -74,7 +73,8 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
|
|||||||
page = self.request(self.root + "/g/" + self.gallery_key).text
|
page = self.request(self.root + "/g/" + self.gallery_key).text
|
||||||
if "The specified gallery could not be found." in page:
|
if "The specified gallery could not be found." in page:
|
||||||
raise exception.NotFoundError("gallery")
|
raise exception.NotFoundError("gallery")
|
||||||
self.image_keys = re.findall(r'<a href="/([^"]+)"><img alt="', page)
|
self.image_keys = util.re(
|
||||||
|
r'<a href="/([^"]+)"><img alt="').findall(page)
|
||||||
|
|
||||||
title = text.extr(page, "<h1>", "</h1>")
|
title = text.extr(page, "<h1>", "</h1>")
|
||||||
title, _, count = title.rpartition(" - ")
|
title, _, count = title.rpartition(" - ")
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ from .. import text, util, exception
|
|||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import itertools
|
import itertools
|
||||||
import binascii
|
import binascii
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?instagram\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?instagram\.com"
|
||||||
USER_PATTERN = BASE_PATTERN + r"/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)"
|
USER_PATTERN = BASE_PATTERN + r"/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)"
|
||||||
@@ -39,7 +38,7 @@ class InstagramExtractor(Extractor):
|
|||||||
def _init(self):
|
def _init(self):
|
||||||
self.www_claim = "0"
|
self.www_claim = "0"
|
||||||
self.csrf_token = util.generate_token()
|
self.csrf_token = util.generate_token()
|
||||||
self._find_tags = re.compile(r"#\w+").findall
|
self._find_tags = util.re(r"#\w+").findall
|
||||||
self._logged_in = True
|
self._logged_in = True
|
||||||
self._cursor = None
|
self._cursor = None
|
||||||
self._user = None
|
self._user = None
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ from .. import text, util, exception
|
|||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(su|party)"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(su|party)"
|
||||||
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
|
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
|
||||||
@@ -44,7 +43,7 @@ class KemonoExtractor(Extractor):
|
|||||||
order = self.config("order-revisions")
|
order = self.config("order-revisions")
|
||||||
self.revisions_reverse = order[0] in ("r", "a") if order else False
|
self.revisions_reverse = order[0] in ("r", "a") if order else False
|
||||||
|
|
||||||
self._find_inline = re.compile(
|
self._find_inline = util.re(
|
||||||
r'src="(?:https?://(?:kemono|coomer)\.su)?(/inline/[^"]+'
|
r'src="(?:https?://(?:kemono|coomer)\.su)?(/inline/[^"]+'
|
||||||
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
|
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
|
||||||
self._json_dumps = json.JSONEncoder(
|
self._json_dumps = json.JSONEncoder(
|
||||||
@@ -52,7 +51,7 @@ class KemonoExtractor(Extractor):
|
|||||||
sort_keys=True, separators=(",", ":")).encode
|
sort_keys=True, separators=(",", ":")).encode
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
find_hash = re.compile(HASH_PATTERN).match
|
find_hash = util.re(HASH_PATTERN).match
|
||||||
generators = self._build_file_generators(self.config("files"))
|
generators = self._build_file_generators(self.config("files"))
|
||||||
announcements = True if self.config("announcements") else None
|
announcements = True if self.config("announcements") else None
|
||||||
archives = True if self.config("archives") else False
|
archives = True if self.config("archives") else False
|
||||||
@@ -409,10 +408,10 @@ class KemonoDiscordExtractor(KemonoExtractor):
|
|||||||
"parent_id" : channel["parent_channel_id"],
|
"parent_id" : channel["parent_channel_id"],
|
||||||
}
|
}
|
||||||
|
|
||||||
find_inline = re.compile(
|
find_inline = util.re(
|
||||||
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
|
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
|
||||||
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
|
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
|
||||||
find_hash = re.compile(HASH_PATTERN).match
|
find_hash = util.re(HASH_PATTERN).match
|
||||||
|
|
||||||
posts = self.api.discord_channel(channel_id)
|
posts = self.api.discord_channel(channel_id)
|
||||||
max_posts = self.config("max-posts")
|
max_posts = self.config("max-posts")
|
||||||
|
|||||||
@@ -9,8 +9,7 @@
|
|||||||
"""Extractors for https://komikcast.la/"""
|
"""Extractors for https://komikcast.la/"""
|
||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
|
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
|
||||||
r"komikcast\.(?:la|cz|lol|site|mo?e|com)")
|
r"komikcast\.(?:la|cz|lol|site|mo?e|com)")
|
||||||
@@ -24,13 +23,11 @@ class KomikcastBase():
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_chapter_string(chapter_string, data=None):
|
def parse_chapter_string(chapter_string, data=None):
|
||||||
"""Parse 'chapter_string' value and add its info to 'data'"""
|
"""Parse 'chapter_string' value and add its info to 'data'"""
|
||||||
if not data:
|
if data is None:
|
||||||
data = {}
|
data = {}
|
||||||
|
|
||||||
match = re.match(
|
pattern = util.re(r"(?:(.*) Chapter )?0*(\d+)([^ ]*)(?: (?:- )?(.+))?")
|
||||||
r"(?:(.*) Chapter )?0*(\d+)([^ ]*)(?: (?:- )?(.+))?",
|
match = pattern.match(text.unescape(chapter_string))
|
||||||
text.unescape(chapter_string),
|
|
||||||
)
|
|
||||||
manga, chapter, data["chapter_minor"], title = match.groups()
|
manga, chapter, data["chapter_minor"], title = match.groups()
|
||||||
|
|
||||||
if manga:
|
if manga:
|
||||||
@@ -59,9 +56,10 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
|
|||||||
def images(page):
|
def images(page):
|
||||||
readerarea = text.extr(
|
readerarea = text.extr(
|
||||||
page, '<div class="main-reading-area', '</div')
|
page, '<div class="main-reading-area', '</div')
|
||||||
|
pattern = util.re(r"<img[^>]* src=[\"']([^\"']+)")
|
||||||
return [
|
return [
|
||||||
(text.unescape(url), None)
|
(text.unescape(url), None)
|
||||||
for url in re.findall(r"<img[^>]* src=[\"']([^\"']+)", readerarea)
|
for url in pattern.findall(readerarea)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,8 +9,7 @@
|
|||||||
"""Extractors for https://www.mangahere.cc/"""
|
"""Extractors for https://www.mangahere.cc/"""
|
||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class MangahereBase():
|
class MangahereBase():
|
||||||
@@ -104,8 +103,8 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
|
|||||||
info, pos = text.extract(page, 'class="title3">', '<', pos)
|
info, pos = text.extract(page, 'class="title3">', '<', pos)
|
||||||
date, pos = text.extract(page, 'class="title2">', '<', pos)
|
date, pos = text.extract(page, 'class="title2">', '<', pos)
|
||||||
|
|
||||||
match = re.match(
|
match = util.re(
|
||||||
r"(?:Vol\.0*(\d+) )?Ch\.0*(\d+)(\S*)(?: - (.*))?", info)
|
r"(?:Vol\.0*(\d+) )?Ch\.0*(\d+)(\S*)(?: - (.*))?").match(info)
|
||||||
if match:
|
if match:
|
||||||
volume, chapter, minor, title = match.groups()
|
volume, chapter, minor, title = match.groups()
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
from .common import ChapterExtractor, Extractor, Message
|
from .common import ChapterExtractor, Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:"
|
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:"
|
||||||
r"(?:manga|comic|read)park\.(?:com|net|org|me|io|to)|"
|
r"(?:manga|comic|read)park\.(?:com|net|org|me|io|to)|"
|
||||||
@@ -22,17 +21,14 @@ BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:"
|
|||||||
class MangaparkBase():
|
class MangaparkBase():
|
||||||
"""Base class for mangapark extractors"""
|
"""Base class for mangapark extractors"""
|
||||||
category = "mangapark"
|
category = "mangapark"
|
||||||
_match_title = None
|
|
||||||
|
|
||||||
def _parse_chapter_title(self, title):
|
def _parse_chapter_title(self, title):
|
||||||
if not self._match_title:
|
match = util.re(
|
||||||
MangaparkBase._match_title = re.compile(
|
r"(?i)"
|
||||||
r"(?i)"
|
r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?"
|
||||||
r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?"
|
r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)"
|
||||||
r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)"
|
r"(?:\s*:\s*(.*))?"
|
||||||
r"(?:\s*:\s*(.*))?"
|
).match(title)
|
||||||
).match
|
|
||||||
match = self._match_title(title)
|
|
||||||
return match.groups() if match else (0, 0, "", "")
|
return match.groups() if match else (0, 0, "", "")
|
||||||
|
|
||||||
@memcache(keyarg=1)
|
@memcache(keyarg=1)
|
||||||
|
|||||||
@@ -7,8 +7,7 @@
|
|||||||
"""Extractors for https://mangaread.org/"""
|
"""Extractors for https://mangaread.org/"""
|
||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class MangareadBase():
|
class MangareadBase():
|
||||||
@@ -18,9 +17,9 @@ class MangareadBase():
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_chapter_string(chapter_string, data):
|
def parse_chapter_string(chapter_string, data):
|
||||||
match = re.match(
|
match = util.re(
|
||||||
r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?",
|
r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?"
|
||||||
text.unescape(chapter_string).strip())
|
).match(text.unescape(chapter_string).strip())
|
||||||
manga, chapter, minor, title = match.groups()
|
manga, chapter, minor, title = match.groups()
|
||||||
manga = manga.strip() if manga else ""
|
manga = manga.strip() if manga else ""
|
||||||
data["manga"] = data.pop("manga", manga)
|
data["manga"] = data.pop("manga", manga)
|
||||||
|
|||||||
@@ -9,11 +9,9 @@
|
|||||||
"""Extractors for Moebooru based sites"""
|
"""Extractors for Moebooru based sites"""
|
||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from .. import text
|
from .. import text, util
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class MoebooruExtractor(BooruExtractor):
|
class MoebooruExtractor(BooruExtractor):
|
||||||
@@ -36,7 +34,7 @@ class MoebooruExtractor(BooruExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
tags = collections.defaultdict(list)
|
tags = collections.defaultdict(list)
|
||||||
pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
|
pattern = util.re(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
|
||||||
for tag_type, tag_name in pattern.findall(tag_container):
|
for tag_type, tag_name in pattern.findall(tag_container):
|
||||||
tags[tag_type].append(text.unquote(tag_name))
|
tags[tag_type].append(text.unquote(tag_name))
|
||||||
for key, value in tags.items():
|
for key, value in tags.items():
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ from .common import Extractor, Message, Dispatch
|
|||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com"
|
||||||
USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com"
|
USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com"
|
||||||
@@ -35,7 +34,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
self.user_root = "https://{}.newgrounds.com".format(self.user)
|
self.user_root = "https://{}.newgrounds.com".format(self.user)
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self._extract_comment_urls = re.compile(
|
self._extract_comment_urls = util.re(
|
||||||
r'(?:<img |data-smartload-)src="([^"]+)').findall
|
r'(?:<img |data-smartload-)src="([^"]+)').findall
|
||||||
self.flash = self.config("flash", True)
|
self.flash = self.config("flash", True)
|
||||||
|
|
||||||
@@ -322,7 +321,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
|
|
||||||
def _video_formats(self, sources):
|
def _video_formats(self, sources):
|
||||||
src = sources["360p"][0]["src"]
|
src = sources["360p"][0]["src"]
|
||||||
sub = re.compile(r"\.360p\.\w+").sub
|
sub = util.re(r"\.360p\.\w+").sub
|
||||||
|
|
||||||
for fmt in self.format:
|
for fmt in self.format:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -10,8 +10,7 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
|
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
|
||||||
|
|
||||||
@@ -37,8 +36,8 @@ class PillowfortExtractor(Extractor):
|
|||||||
external = self.config("external", False)
|
external = self.config("external", False)
|
||||||
|
|
||||||
if inline:
|
if inline:
|
||||||
inline = re.compile(r'src="(https://img\d+\.pillowfort\.social'
|
inline = util.re(r'src="(https://img\d+\.pillowfort\.social'
|
||||||
r'/posts/[^"]+)').findall
|
r'/posts/[^"]+)').findall
|
||||||
|
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
if "original_post" in post and not reblogs:
|
if "original_post" in post and not reblogs:
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ from ..cache import cache, memcache
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import itertools
|
import itertools
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
|
||||||
USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
|
USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
|
||||||
@@ -45,7 +44,7 @@ class PixivExtractor(Extractor):
|
|||||||
self.meta_captions = self.config("captions")
|
self.meta_captions = self.config("captions")
|
||||||
|
|
||||||
if self.meta_captions:
|
if self.meta_captions:
|
||||||
self.meta_captions_sub = re.compile(
|
self.meta_captions_sub = util.re(
|
||||||
r'<a href="/jump\.php\?([^"]+)').sub
|
r'<a href="/jump\.php\?([^"]+)').sub
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class PlurkExtractor(Extractor):
|
class PlurkExtractor(Extractor):
|
||||||
@@ -64,7 +63,8 @@ class PlurkExtractor(Extractor):
|
|||||||
def _load(data):
|
def _load(data):
|
||||||
if not data:
|
if not data:
|
||||||
raise exception.NotFoundError("user")
|
raise exception.NotFoundError("user")
|
||||||
return util.json_loads(re.sub(r"new Date\(([^)]+)\)", r"\1", data))
|
return util.json_loads(
|
||||||
|
util.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
|
||||||
|
|
||||||
|
|
||||||
class PlurkTimelineExtractor(PlurkExtractor):
|
class PlurkTimelineExtractor(PlurkExtractor):
|
||||||
|
|||||||
@@ -6,9 +6,8 @@
|
|||||||
|
|
||||||
"""Extractors for Postmill instances"""
|
"""Extractors for Postmill instances"""
|
||||||
|
|
||||||
import re
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
|
|
||||||
|
|
||||||
class PostmillExtractor(BaseExtractor):
|
class PostmillExtractor(BaseExtractor):
|
||||||
@@ -21,8 +20,8 @@ class PostmillExtractor(BaseExtractor):
|
|||||||
def _init(self):
|
def _init(self):
|
||||||
self.instance = self.root.partition("://")[2]
|
self.instance = self.root.partition("://")[2]
|
||||||
self.save_link_post_body = self.config("save-link-post-body", False)
|
self.save_link_post_body = self.config("save-link-post-body", False)
|
||||||
self._search_canonical_url = re.compile(r"/f/([\w\d_]+)/(\d+)/").search
|
self._search_canonical_url = util.re(r"/f/([\w\d_]+)/(\d+)/").search
|
||||||
self._search_image_tag = re.compile(
|
self._search_image_tag = util.re(
|
||||||
r'<a href="[^"]+"\n +class="submission__image-link"').search
|
r'<a href="[^"]+"\n +class="submission__image-link"').search
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
from . import booru
|
from . import booru
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
import collections
|
import collections
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?realbooru\.com"
|
BASE_PATTERN = r"(?:https?://)?realbooru\.com"
|
||||||
|
|
||||||
@@ -72,8 +71,7 @@ class RealbooruExtractor(booru.BooruExtractor):
|
|||||||
page = post["_html"]
|
page = post["_html"]
|
||||||
tag_container = text.extr(page, 'id="tagLink"', '</div>')
|
tag_container = text.extr(page, 'id="tagLink"', '</div>')
|
||||||
tags = collections.defaultdict(list)
|
tags = collections.defaultdict(list)
|
||||||
pattern = re.compile(
|
pattern = util.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
|
||||||
r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
|
|
||||||
for tag_type, tag_name in pattern.findall(tag_container):
|
for tag_type, tag_name in pattern.findall(tag_container):
|
||||||
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
|
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
|
||||||
for key, value in tags.items():
|
for key, value in tags.items():
|
||||||
|
|||||||
@@ -9,8 +9,7 @@
|
|||||||
"""Recursive extractor"""
|
"""Recursive extractor"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class RecursiveExtractor(Extractor):
|
class RecursiveExtractor(Extractor):
|
||||||
@@ -28,5 +27,5 @@ class RecursiveExtractor(Extractor):
|
|||||||
else:
|
else:
|
||||||
page = self.request(text.ensure_http_scheme(url)).text
|
page = self.request(text.ensure_http_scheme(url)).text
|
||||||
|
|
||||||
for match in re.finditer(r"https?://[^\s\"']+", page):
|
for match in util.re(r"https?://[^\s\"']+").finditer(page):
|
||||||
yield Message.Queue, match.group(0), {}
|
yield Message.Queue, match.group(0), {}
|
||||||
|
|||||||
@@ -9,9 +9,8 @@
|
|||||||
"""Extractors for https://rule34.us/"""
|
"""Extractors for https://rule34.us/"""
|
||||||
|
|
||||||
from .booru import BooruExtractor
|
from .booru import BooruExtractor
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import collections
|
import collections
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class Rule34usExtractor(BooruExtractor):
|
class Rule34usExtractor(BooruExtractor):
|
||||||
@@ -20,7 +19,7 @@ class Rule34usExtractor(BooruExtractor):
|
|||||||
per_page = 42
|
per_page = 42
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self._find_tags = re.compile(
|
self._find_tags = util.re(
|
||||||
r'<li class="([^-"]+)-tag"[^>]*><a href="[^;"]+;q=([^"]+)').findall
|
r'<li class="([^-"]+)-tag"[^>]*><a href="[^;"]+;q=([^"]+)').findall
|
||||||
|
|
||||||
def _parse_post(self, post_id):
|
def _parse_post(self, post_id):
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ from .common import Message
|
|||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import collections
|
import collections
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?" \
|
BASE_PATTERN = r"(?:https?://)?" \
|
||||||
r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
|
r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
|
||||||
@@ -48,7 +47,7 @@ class SankakuExtractor(BooruExtractor):
|
|||||||
self.api = SankakuAPI(self)
|
self.api = SankakuAPI(self)
|
||||||
if self.config("tags") == "extended":
|
if self.config("tags") == "extended":
|
||||||
self._tags = self._tags_extended
|
self._tags = self._tags_extended
|
||||||
self._tags_findall = re.compile(
|
self._tags_findall = util.re(
|
||||||
r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
|
r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
|
||||||
|
|
||||||
def _file_url(self, post):
|
def _file_url(self, post):
|
||||||
@@ -130,11 +129,11 @@ class SankakuTagExtractor(SankakuExtractor):
|
|||||||
|
|
||||||
if "date:" in self.tags:
|
if "date:" in self.tags:
|
||||||
# rewrite 'date:' tags (#1790)
|
# rewrite 'date:' tags (#1790)
|
||||||
self.tags = re.sub(
|
self.tags = util.re(
|
||||||
r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)",
|
r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)").sub(
|
||||||
r"date:\3-\2-\1T00:00", self.tags)
|
r"date:\3-\2-\1T00:00", self.tags)
|
||||||
self.tags = re.sub(
|
self.tags = util.re(
|
||||||
r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)",
|
r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)").sub(
|
||||||
r"date:\1-\2-\3T00:00", self.tags)
|
r"date:\1-\2-\3T00:00", self.tags)
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class SankakucomplexExtractor(Extractor):
|
class SankakucomplexExtractor(Extractor):
|
||||||
@@ -66,7 +65,7 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_images(content):
|
def _extract_images(content):
|
||||||
orig_sub = re.compile(r"-\d+x\d+\.").sub
|
orig_sub = util.re(r"-\d+x\d+\.").sub
|
||||||
return [
|
return [
|
||||||
orig_sub(".", url) for url in
|
orig_sub(".", url) for url in
|
||||||
util.unique(text.extract_iter(content, 'data-lazy-src="', '"'))
|
util.unique(text.extract_iter(content, 'data-lazy-src="', '"'))
|
||||||
@@ -74,13 +73,13 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_videos(content):
|
def _extract_videos(content):
|
||||||
return re.findall(r"<source [^>]*src=[\"']([^\"']+)", content)
|
return util.re(r"<source [^>]*src=[\"']([^\"']+)").findall(content)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_embeds(content):
|
def _extract_embeds(content):
|
||||||
return [
|
return [
|
||||||
"ytdl:" + url for url in
|
"ytdl:" + url for url in
|
||||||
re.findall(r"<iframe [^>]*src=[\"']([^\"']+)", content)
|
util.re(r"<iframe [^>]*src=[\"']([^\"']+)").findall(content)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,8 +9,7 @@
|
|||||||
"""Extractors for https://speakerdeck.com/"""
|
"""Extractors for https://speakerdeck.com/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor
|
from .common import GalleryExtractor
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class SpeakerdeckPresentationExtractor(GalleryExtractor):
|
class SpeakerdeckPresentationExtractor(GalleryExtractor):
|
||||||
@@ -48,7 +47,8 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor):
|
|||||||
|
|
||||||
def images(self, _):
|
def images(self, _):
|
||||||
url = "{}/player/{}".format(self.root, self.presentation_id)
|
url = "{}/player/{}".format(self.root, self.presentation_id)
|
||||||
page = re.sub(r"\s+", " ", self.request(url).text)
|
page = self.request(url).text
|
||||||
|
page = util.re(r"\s+").sub(" ", page)
|
||||||
return [
|
return [
|
||||||
(url, None)
|
(url, None)
|
||||||
for url in text.extract_iter(page, 'js-sd-slide" data-url="', '"')
|
for url in text.extract_iter(page, 'js-sd-slide" data-url="', '"')
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
|
||||||
|
|
||||||
@@ -158,8 +157,8 @@ class SubscribestarExtractor(Extractor):
|
|||||||
attachments = text.extr(
|
attachments = text.extr(
|
||||||
html, 'class="uploads-docs"', 'class="post-edit_form"')
|
html, 'class="uploads-docs"', 'class="post-edit_form"')
|
||||||
if attachments:
|
if attachments:
|
||||||
for att in re.split(
|
for att in util.re(r'class="doc_preview[" ]').split(
|
||||||
r'class="doc_preview[" ]', attachments)[1:]:
|
attachments)[1:]:
|
||||||
media.append({
|
media.append({
|
||||||
"id" : text.parse_int(text.extr(
|
"id" : text.parse_int(text.extr(
|
||||||
att, 'data-upload-id="', '"')),
|
att, 'data-upload-id="', '"')),
|
||||||
@@ -172,8 +171,8 @@ class SubscribestarExtractor(Extractor):
|
|||||||
audios = text.extr(
|
audios = text.extr(
|
||||||
html, 'class="uploads-audios"', 'class="post-edit_form"')
|
html, 'class="uploads-audios"', 'class="post-edit_form"')
|
||||||
if audios:
|
if audios:
|
||||||
for audio in re.split(
|
for audio in util.re(r'class="audio_preview-data[" ]').split(
|
||||||
r'class="audio_preview-data[" ]', audios)[1:]:
|
audios)[1:]:
|
||||||
media.append({
|
media.append({
|
||||||
"id" : text.parse_int(text.extr(
|
"id" : text.parse_int(text.extr(
|
||||||
audio, 'data-upload-id="', '"')),
|
audio, 'data-upload-id="', '"')),
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, oauth, exception
|
from .. import text, util, oauth, exception
|
||||||
from datetime import datetime, date, timedelta
|
from datetime import datetime, date, timedelta
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
@@ -66,16 +65,16 @@ class TumblrExtractor(Extractor):
|
|||||||
blog = None
|
blog = None
|
||||||
|
|
||||||
# pre-compile regular expressions
|
# pre-compile regular expressions
|
||||||
self._sub_video = re.compile(
|
self._sub_video = util.re(
|
||||||
r"https?://((?:vt|vtt|ve)(?:\.media)?\.tumblr\.com"
|
r"https?://((?:vt|vtt|ve)(?:\.media)?\.tumblr\.com"
|
||||||
r"/tumblr_[^_]+)_\d+\.([0-9a-z]+)").sub
|
r"/tumblr_[^_]+)_\d+\.([0-9a-z]+)").sub
|
||||||
if self.inline:
|
if self.inline:
|
||||||
self._sub_image = re.compile(
|
self._sub_image = util.re(
|
||||||
r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
|
r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
|
||||||
r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)").sub
|
r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)").sub
|
||||||
self._subn_orig_image = re.compile(r"/s\d+x\d+/").subn
|
self._subn_orig_image = util.re(r"/s\d+x\d+/").subn
|
||||||
_findall_image = re.compile('<img src="([^"]+)"').findall
|
_findall_image = util.re('<img src="([^"]+)"').findall
|
||||||
_findall_video = re.compile('<source src="([^"]+)"').findall
|
_findall_video = util.re('<source src="([^"]+)"').findall
|
||||||
|
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
if self.date_min > post["timestamp"]:
|
if self.date_min > post["timestamp"]:
|
||||||
|
|||||||
@@ -9,8 +9,7 @@
|
|||||||
"""Extractors for https://vk.com/"""
|
"""Extractors for https://vk.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
|
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
|
||||||
|
|
||||||
@@ -32,7 +31,7 @@ class VkExtractor(Extractor):
|
|||||||
return num
|
return num
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
sub = re.compile(r"/imp[fg]/").sub
|
sub = util.re(r"/imp[fg]/").sub
|
||||||
sizes = "wzyxrqpo"
|
sizes = "wzyxrqpo"
|
||||||
|
|
||||||
data = self.metadata()
|
data = self.metadata()
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ from .booru import BooruExtractor
|
|||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
import collections
|
import collections
|
||||||
import re
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
|
||||||
|
|
||||||
@@ -127,7 +126,7 @@ class ZerochanExtractor(BooruExtractor):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def _parse_json(self, txt):
|
def _parse_json(self, txt):
|
||||||
txt = re.sub(r"[\x00-\x1f\x7f]", "", txt)
|
txt = util.re(r"[\x00-\x1f\x7f]").sub("", txt)
|
||||||
main, _, tags = txt.partition('tags": [')
|
main, _, tags = txt.partition('tags": [')
|
||||||
|
|
||||||
item = {}
|
item = {}
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ __tests__ = (
|
|||||||
"views" : int,
|
"views" : int,
|
||||||
"favorites" : int,
|
"favorites" : int,
|
||||||
"comments" : int,
|
"comments" : int,
|
||||||
"_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT",
|
"_http_lastmodified": "Sat, 16 Feb 2019 19:30:34 GMT",
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ from gallery_dl import exception
|
|||||||
__tests__ = (
|
__tests__ = (
|
||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8",
|
"#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
"#count" : 66,
|
"#count" : 66,
|
||||||
|
|
||||||
@@ -33,7 +32,6 @@ __tests__ = (
|
|||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5",
|
"#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5",
|
||||||
"#comment" : "volume (vol) in url",
|
"#comment" : "volume (vol) in url",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
"#count" : 7,
|
"#count" : 7,
|
||||||
|
|
||||||
@@ -46,7 +44,6 @@ __tests__ = (
|
|||||||
{
|
{
|
||||||
"#url" : "https://mto.to/chapter/2584460",
|
"#url" : "https://mto.to/chapter/2584460",
|
||||||
"#comment" : "'-' in manga title (#5200)",
|
"#comment" : "'-' in manga title (#5200)",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
|
|
||||||
"chapter" : 9,
|
"chapter" : 9,
|
||||||
@@ -64,7 +61,6 @@ __tests__ = (
|
|||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/90710-new-suitor-for-the-abandoned-wife/2089747-ch_76",
|
"#url" : "https://bato.to/title/90710-new-suitor-for-the-abandoned-wife/2089747-ch_76",
|
||||||
"#comment" : "duplicate info in chapter_minor / title (#5988)",
|
"#comment" : "duplicate info in chapter_minor / title (#5988)",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
|
|
||||||
"chapter" : 76,
|
"chapter" : 76,
|
||||||
@@ -76,7 +72,6 @@ __tests__ = (
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/115494-today-with-you/2631897-ch_38",
|
"#url" : "https://bato.to/title/115494-today-with-you/2631897-ch_38",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
|
|
||||||
"chapter" : 37,
|
"chapter" : 37,
|
||||||
@@ -94,20 +89,17 @@ __tests__ = (
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/86408/1681030",
|
"#url" : "https://bato.to/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://bato.to/chapter/1681030",
|
"#url" : "https://bato.to/chapter/1681030",
|
||||||
"#comment" : "v2 URL",
|
"#comment" : "v2 URL",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official",
|
"#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official",
|
||||||
"#category": ("", "batoto", "manga"),
|
|
||||||
"#class" : batoto.BatotoMangaExtractor,
|
"#class" : batoto.BatotoMangaExtractor,
|
||||||
"#options" : {"domain": "xbato.org"},
|
"#options" : {"domain": "xbato.org"},
|
||||||
"#count" : ">= 21",
|
"#count" : ">= 21",
|
||||||
@@ -122,7 +114,6 @@ __tests__ = (
|
|||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/104929-86-eighty-six-official",
|
"#url" : "https://bato.to/title/104929-86-eighty-six-official",
|
||||||
"#comment" : "Manga with number in name",
|
"#comment" : "Manga with number in name",
|
||||||
"#category": ("", "batoto", "manga"),
|
|
||||||
"#class" : batoto.BatotoMangaExtractor,
|
"#class" : batoto.BatotoMangaExtractor,
|
||||||
"#count" : ">= 18",
|
"#count" : ">= 18",
|
||||||
|
|
||||||
@@ -132,7 +123,6 @@ __tests__ = (
|
|||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan",
|
"#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan",
|
||||||
"#comment" : "Non-English translation (Indonesian)",
|
"#comment" : "Non-English translation (Indonesian)",
|
||||||
"#category": ("", "batoto", "manga"),
|
|
||||||
"#class" : batoto.BatotoMangaExtractor,
|
"#class" : batoto.BatotoMangaExtractor,
|
||||||
"#count" : ">= 29",
|
"#count" : ">= 29",
|
||||||
|
|
||||||
@@ -142,149 +132,130 @@ __tests__ = (
|
|||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/134270-removed",
|
"#url" : "https://bato.to/title/134270-removed",
|
||||||
"#comment" : "Deleted/removed manga",
|
"#comment" : "Deleted/removed manga",
|
||||||
"#category": ("", "batoto", "manga"),
|
|
||||||
"#class" : batoto.BatotoMangaExtractor,
|
"#class" : batoto.BatotoMangaExtractor,
|
||||||
"#exception": exception.StopExtraction,
|
"#exception": exception.StopExtraction,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://bato.to/title/86408-i-shall-master-this-family-official",
|
"#url" : "https://bato.to/title/86408-i-shall-master-this-family-official",
|
||||||
"#category": ("", "batoto", "manga"),
|
|
||||||
"#class" : batoto.BatotoMangaExtractor,
|
"#class" : batoto.BatotoMangaExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://bato.to/series/86408/i-shall-master-this-family-official",
|
"#url" : "https://bato.to/series/86408/i-shall-master-this-family-official",
|
||||||
"#comment" : "v2 URL",
|
"#comment" : "v2 URL",
|
||||||
"#category": ("", "batoto", "manga"),
|
|
||||||
"#class" : batoto.BatotoMangaExtractor,
|
"#class" : batoto.BatotoMangaExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://dto.to/title/86408/1681030",
|
"#url" : "https://dto.to/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#url" : "https://fto.to/title/86408/1681030",
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://hto.to/title/86408/1681030",
|
"#url" : "https://hto.to/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#url" : "https://jto.to/title/86408/1681030",
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://mto.to/title/86408/1681030",
|
"#url" : "https://mto.to/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://wto.to/title/86408/1681030",
|
"#url" : "https://wto.to/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://mangatoto.com/title/86408/1681030",
|
"#url" : "https://mangatoto.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://mangatoto.net/title/86408/1681030",
|
"#url" : "https://mangatoto.net/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://mangatoto.org/title/86408/1681030",
|
"#url" : "https://mangatoto.org/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://batocomic.com/title/86408/1681030",
|
"#url" : "https://batocomic.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://batocomic.net/title/86408/1681030",
|
"#url" : "https://batocomic.net/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://batocomic.org/title/86408/1681030",
|
"#url" : "https://batocomic.org/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://readtoto.com/title/86408/1681030",
|
"#url" : "https://readtoto.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://readtoto.net/title/86408/1681030",
|
"#url" : "https://readtoto.net/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://readtoto.org/title/86408/1681030",
|
"#url" : "https://readtoto.org/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://xbato.com/title/86408/1681030",
|
"#url" : "https://xbato.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://xbato.net/title/86408/1681030",
|
"#url" : "https://xbato.net/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://xbato.org/title/86408/1681030",
|
"#url" : "https://xbato.org/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://zbato.com/title/86408/1681030",
|
"#url" : "https://zbato.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://zbato.net/title/86408/1681030",
|
"#url" : "https://zbato.net/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://zbato.org/title/86408/1681030",
|
"#url" : "https://zbato.org/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://comiko.net/title/86408/1681030",
|
"#url" : "https://comiko.net/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://comiko.org/title/86408/1681030",
|
"#url" : "https://comiko.org/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://batotoo.com/title/86408/1681030",
|
"#url" : "https://batotoo.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://batotwo.com/title/86408/1681030",
|
"#url" : "https://batotwo.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"#url" : "https://battwo.com/title/86408/1681030",
|
"#url" : "https://battwo.com/title/86408/1681030",
|
||||||
"#category": ("", "batoto", "chapter"),
|
|
||||||
"#class" : batoto.BatotoChapterExtractor,
|
"#class" : batoto.BatotoChapterExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ __tests__ = (
|
|||||||
"extension" : "avif",
|
"extension" : "avif",
|
||||||
"filename" : str,
|
"filename" : str,
|
||||||
"gallery_id": 1615823,
|
"gallery_id": 1615823,
|
||||||
"group" : [],
|
"group" : ["mofumofuen"],
|
||||||
"lang" : "ja",
|
"lang" : "ja",
|
||||||
"language" : "Japanese",
|
"language" : "Japanese",
|
||||||
"num" : range(1, 22),
|
"num" : range(1, 22),
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ __tests__ = (
|
|||||||
"#url" : "https://www.mangaread.org/manga/doesnotexist",
|
"#url" : "https://www.mangaread.org/manga/doesnotexist",
|
||||||
"#category": ("", "mangaread", "manga"),
|
"#category": ("", "mangaread", "manga"),
|
||||||
"#class" : mangaread.MangareadMangaExtractor,
|
"#class" : mangaread.MangareadMangaExtractor,
|
||||||
"#exception": exception.HttpError,
|
"#exception": exception.NotFoundError,
|
||||||
},
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -20,9 +20,9 @@ __tests__ = (
|
|||||||
"#category": ("booru", "realbooru", "pool"),
|
"#category": ("booru", "realbooru", "pool"),
|
||||||
"#class" : realbooru.RealbooruPoolExtractor,
|
"#class" : realbooru.RealbooruPoolExtractor,
|
||||||
"#urls" : (
|
"#urls" : (
|
||||||
"https://realbooru.com//images/bf/d6/bfd682f338691e5254de796040fcba21.mp4",
|
"https://video-cdn.realbooru.com//images/bf/d6/bfd682f338691e5254de796040fcba21.mp4",
|
||||||
"https://realbooru.com//images/cb/7d/cb7d921673ba99f688031ac554777695.mp4",
|
"https://video-cdn.realbooru.com//images/cb/7d/cb7d921673ba99f688031ac554777695.mp4",
|
||||||
"https://realbooru.com//images/9e/14/9e140edc1cb2e4cc734ba5bdc4870955.mp4",
|
"https://video-cdn.realbooru.com//images/9e/14/9e140edc1cb2e4cc734ba5bdc4870955.mp4",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ __tests__ = (
|
|||||||
"#url" : "https://sankaku.app/?tags=bonocho",
|
"#url" : "https://sankaku.app/?tags=bonocho",
|
||||||
"#category": ("booru", "sankaku", "tag"),
|
"#category": ("booru", "sankaku", "tag"),
|
||||||
"#class" : sankaku.SankakuTagExtractor,
|
"#class" : sankaku.SankakuTagExtractor,
|
||||||
"#pattern" : r"https://s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[0-9a-f]{32}\.\w+\?e=\d+&(expires=\d+&)?m=[^&#]+",
|
"#pattern" : r"https://s\.sankakucomplex\.com/o/[^/]{2}/[^/]{2}/[0-9a-f]{32}\.\w+\?e=\d+&(expires=\d+&)?m=[^&#]+",
|
||||||
"#count" : 5,
|
"#count" : 5,
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -461,7 +461,7 @@ __tests__ = (
|
|||||||
"#comment" : "legacy post URL",
|
"#comment" : "legacy post URL",
|
||||||
"#category": ("booru", "sankaku", "post"),
|
"#category": ("booru", "sankaku", "post"),
|
||||||
"#class" : sankaku.SankakuPostExtractor,
|
"#class" : sankaku.SankakuPostExtractor,
|
||||||
"#pattern" : r"https://s\.sankakucomplex\.com/data/ac/8e/ac8e3b92ea328ce9cf7211e69c905bf9\.jpg\?e=.+",
|
"#pattern" : r"https://s\.sankakucomplex\.com/o/ac/8e/ac8e3b92ea328ce9cf7211e69c905bf9\.jpg\?e=.+",
|
||||||
|
|
||||||
# "id": 360451,
|
# "id": 360451,
|
||||||
"id": "y0abGlDOr2o",
|
"id": "y0abGlDOr2o",
|
||||||
@@ -473,7 +473,7 @@ __tests__ = (
|
|||||||
"#category": ("booru", "sankaku", "post"),
|
"#category": ("booru", "sankaku", "post"),
|
||||||
"#class" : sankaku.SankakuPostExtractor,
|
"#class" : sankaku.SankakuPostExtractor,
|
||||||
"#auth" : True,
|
"#auth" : True,
|
||||||
"#pattern" : r"https://s\.sankakucomplex\.com/data/13/3c/133cda3bfde249c504284493903fb985\.jpg",
|
"#pattern" : r"https://s\.sankakucomplex\.com/o/13/3c/133cda3bfde249c504284493903fb985\.jpg",
|
||||||
|
|
||||||
"md5": "133cda3bfde249c504284493903fb985",
|
"md5": "133cda3bfde249c504284493903fb985",
|
||||||
},
|
},
|
||||||
@@ -506,7 +506,7 @@ __tests__ = (
|
|||||||
"#comment" : "md5 hexdigest instead of ID (#3952)",
|
"#comment" : "md5 hexdigest instead of ID (#3952)",
|
||||||
"#category": ("booru", "sankaku", "post"),
|
"#category": ("booru", "sankaku", "post"),
|
||||||
"#class" : sankaku.SankakuPostExtractor,
|
"#class" : sankaku.SankakuPostExtractor,
|
||||||
"#pattern" : r"https://s\.sankakucomplex\.com/data/f8/ba/f8ba89043078f0e4be2d9c46550b840a\.jpg",
|
"#pattern" : r"https://s\.sankakucomplex\.com/o/f8/ba/f8ba89043078f0e4be2d9c46550b840a\.jpg",
|
||||||
"#count" : 1,
|
"#count" : 1,
|
||||||
|
|
||||||
# "id" : 33195194,
|
# "id" : 33195194,
|
||||||
@@ -519,7 +519,7 @@ __tests__ = (
|
|||||||
"#comment" : "/posts/ instead of /post/show/ (#4688)",
|
"#comment" : "/posts/ instead of /post/show/ (#4688)",
|
||||||
"#category": ("booru", "sankaku", "post"),
|
"#category": ("booru", "sankaku", "post"),
|
||||||
"#class" : sankaku.SankakuPostExtractor,
|
"#class" : sankaku.SankakuPostExtractor,
|
||||||
"#pattern" : r"https://s\.sankakucomplex\.com/data/f8/ba/f8ba89043078f0e4be2d9c46550b840a\.jpg",
|
"#pattern" : r"https://s\.sankakucomplex\.com/o/f8/ba/f8ba89043078f0e4be2d9c46550b840a\.jpg",
|
||||||
"#count" : 1,
|
"#count" : 1,
|
||||||
|
|
||||||
# "id" : 33195194,
|
# "id" : 33195194,
|
||||||
|
|||||||
Reference in New Issue
Block a user