[cleanup] Misc (#10807)
Closes #10751, Closes #10769, Closes #10791 Authored by: bashonly, Codenade, pzhlkj6612, seproDev, coletdjnz, grqz, Grub4K Co-authored-by: Codenade <amadeus.dorian04@gmail.com> Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> Co-authored-by: coletdjnz <coletdjnz@protonmail.com> Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
This commit is contained in:
@@ -508,7 +508,7 @@ class FFmpegFD(ExternalFD):
|
||||
env = None
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
if not re.match(r'[\da-zA-Z]+://', proxy):
|
||||
proxy = f'http://{proxy}'
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
@@ -559,7 +559,7 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
is_http = re.match(r'https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
|
||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
||||
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
@@ -470,7 +470,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
|
||||
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
|
||||
@@ -2077,7 +2077,7 @@ class InfoExtractor:
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
@@ -2812,11 +2812,11 @@ class InfoExtractor:
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if try_call(lambda: base_url_e.text) is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
if re.match(r'https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and base_url.startswith('/'):
|
||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||
if not mpd_base_url.endswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
@@ -2906,7 +2906,7 @@ class InfoExtractor:
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
return 'url' if re.match(r'https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -2340,7 +2340,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2400,7 +2400,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
|
||||
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
|
||||
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Copy the link from the button below the video description/player '
|
||||
'and use that link to download. If there is no button in the frame, '
|
||||
'get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
|
||||
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
||||
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
|
||||
video_data = None
|
||||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
if re.match(r'\/\+\/', meta_url):
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
IE_NAME = 'MangoTV'
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_BASE_URL = 'http://ocw.mit.edu/'
|
||||
|
||||
_TESTS = [
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NZOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
|
||||
'info_dict': {
|
||||
|
||||
@@ -628,8 +628,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
for e in page_entries:
|
||||
yield e
|
||||
yield from page_entries
|
||||
if not self._has_more(webpage):
|
||||
break
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RadioFranceIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||
IE_NAME = 'radiofrance'
|
||||
|
||||
_TEST = {
|
||||
|
||||
@@ -6,7 +6,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ReverbNationIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class Tele13IE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||
|
||||
@@ -270,7 +270,7 @@ class TwitCastingLiveIE(InfoExtractor):
|
||||
|
||||
|
||||
class TwitCastingUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
|
||||
'info_dict': {
|
||||
|
||||
@@ -90,7 +90,7 @@ class ViuIE(ViuBaseIE):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||
|
||||
for key, value in video_data.items():
|
||||
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||
mobj = re.match(r'subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||
if not mobj:
|
||||
continue
|
||||
subtitles.setdefault(mobj.group('lang'), []).append({
|
||||
|
||||
@@ -21,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
|
||||
class XimalayaIE(XimalayaBaseIE):
|
||||
IE_NAME = 'ximalaya'
|
||||
IE_DESC = '喜马拉雅FM'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ximalaya.com/sound/47740352/',
|
||||
|
||||
@@ -33,8 +33,8 @@ if not websockets:
|
||||
import websockets.version
|
||||
|
||||
websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
|
||||
if websockets_version < (12, 0):
|
||||
raise ImportError('Only websockets>=12.0 is supported')
|
||||
if websockets_version < (13, 0):
|
||||
raise ImportError('Only websockets>=13.0 is supported')
|
||||
|
||||
import websockets.sync.client
|
||||
from websockets.uri import parse_uri
|
||||
|
||||
@@ -647,16 +647,16 @@ def create_parser():
|
||||
'You can also simply specify a field to match if the field is present, '
|
||||
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
|
||||
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
|
||||
'the filter matches if at least one of the conditions is met. E.g. --match-filter '
|
||||
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
||||
'the filter matches if at least one of the conditions is met. E.g. --match-filters '
|
||||
'!is_live --match-filters "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
||||
'matches only videos that are not live OR those that have a like count more than 100 '
|
||||
'(or the like field is not available) and also has a description '
|
||||
'that contains the phrase "cats & dogs" (caseless). '
|
||||
'Use "--match-filter -" to interactively ask whether to download each video'))
|
||||
'Use "--match-filters -" to interactively ask whether to download each video'))
|
||||
selection.add_option(
|
||||
'--no-match-filters',
|
||||
dest='match_filter', action='store_const', const=None,
|
||||
help='Do not use any --match-filter (default)')
|
||||
help='Do not use any --match-filters (default)')
|
||||
selection.add_option(
|
||||
'--break-match-filters',
|
||||
metavar='FILTER', dest='breaking_match_filter', action='append',
|
||||
@@ -704,7 +704,7 @@ def create_parser():
|
||||
selection.add_option(
|
||||
'--break-per-input',
|
||||
action='store_true', dest='break_per_url', default=False,
|
||||
help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
|
||||
help='Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL')
|
||||
selection.add_option(
|
||||
'--no-break-per-input',
|
||||
action='store_false', dest='break_per_url',
|
||||
|
||||
@@ -33,7 +33,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
|
||||
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
|
||||
FFmpegPostProcessor.__init__(self, downloader)
|
||||
self._categories = tuple(categories or self.CATEGORIES.keys())
|
||||
self._API_URL = api if re.match('^https?://', api) else 'https://' + api
|
||||
self._API_URL = api if re.match('https?://', api) else 'https://' + api
|
||||
|
||||
def run(self, info):
|
||||
extractor = info['extractor_key']
|
||||
|
||||
@@ -1954,7 +1954,7 @@ def urljoin(base, path):
|
||||
path = path.decode()
|
||||
if not isinstance(path, str) or not path:
|
||||
return None
|
||||
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
|
||||
if re.match(r'(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
|
||||
return path
|
||||
if isinstance(base, bytes):
|
||||
base = base.decode()
|
||||
@@ -2007,7 +2007,7 @@ def url_or_none(url):
|
||||
if not url or not isinstance(url, str):
|
||||
return None
|
||||
url = url.strip()
|
||||
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
|
||||
|
||||
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
||||
@@ -3113,7 +3113,7 @@ def is_html(first_bytes):
|
||||
while first_bytes.startswith(bom):
|
||||
encoding, first_bytes = enc, first_bytes[len(bom):]
|
||||
|
||||
return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
|
||||
return re.match(r'\s*<', first_bytes.decode(encoding, 'replace'))
|
||||
|
||||
|
||||
def determine_protocol(info_dict):
|
||||
|
||||
Reference in New Issue
Block a user