[cleanup] Misc (#10807)

Closes #10751, Closes #10769, Closes #10791 Authored by: bashonly, Codenade, pzhlkj6612, seproDev, coletdjnz, grqz, Grub4K Co-authored-by: Codenade <amadeus.dorian04@gmail.com> Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> Co-authored-by: coletdjnz <coletdjnz@protonmail.com> Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
2024-09-27 17:46:22 -05:00
parent cca534cd9e
commit c6387abc1a
31 changed files with 63 additions and 61 deletions
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -508,7 +508,7 @@ class FFmpegFD(ExternalFD):
        env = None
        proxy = self.params.get('proxy')
        if proxy:
-            if not re.match(r'^[\da-zA-Z]+://', proxy):
+            if not re.match(r'[\da-zA-Z]+://', proxy):
                proxy = f'http://{proxy}'

            if proxy.startswith('socks'):
@@ -559,7 +559,7 @@ class FFmpegFD(ExternalFD):

        selected_formats = info_dict.get('requested_formats') or [info_dict]
        for i, fmt in enumerate(selected_formats):
-            is_http = re.match(r'^https?://', fmt['url'])
+            is_http = re.match(r'https?://', fmt['url'])
            cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
            if cookies:
                args.extend(['-cookies', ''.join(
--- a/yt_dlp/extractor/academicearth.py
+++ b/yt_dlp/extractor/academicearth.py
@@ -4,7 +4,7 @@ from .common import InfoExtractor


 class AcademicEarthCourseIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
+    _VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
    IE_NAME = 'AcademicEarth:Course'
    _TEST = {
        'url': 'http://academicearth.org/playlists/laws-of-nature/',
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):

 class ARDBetaMediathekIE(InfoExtractor):
    IE_NAME = 'ARDMediathek'
-    _VALID_URL = r'''(?x)https://
+    _VALID_URL = r'''(?x)https?://
        (?:(?:beta|www)\.)?ardmediathek\.de/
        (?:[^/]+/)?
        (?:player|live|video)/
@@ -470,7 +470,7 @@ class ARDBetaMediathekIE(InfoExtractor):


 class ARDMediathekCollectionIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https://
+    _VALID_URL = r'''(?x)https?://
        (?:(?:beta|www)\.)?ardmediathek\.de/
        (?:[^/?#]+/)?
        (?P<playlist>sendung|serie|sammlung)/
--- a/yt_dlp/extractor/callin.py
+++ b/yt_dlp/extractor/callin.py
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj


 class CallinIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
    _TESTS = [{
        'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
        'info_dict': {
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2077,7 +2077,7 @@ class InfoExtractor:
        has_drm = HlsFD._has_drm(m3u8_doc)

        def format_url(url):
-            return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
+            return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)

        if self.get_param('hls_split_discontinuity', False):
            def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2812,11 +2812,11 @@ class InfoExtractor:
                        base_url_e = element.find(_add_ns('BaseURL'))
                        if try_call(lambda: base_url_e.text) is not None:
                            base_url = base_url_e.text + base_url
-                            if re.match(r'^https?://', base_url):
+                            if re.match(r'https?://', base_url):
                                break
                    if mpd_base_url and base_url.startswith('/'):
                        base_url = urllib.parse.urljoin(mpd_base_url, base_url)
-                    elif mpd_base_url and not re.match(r'^https?://', base_url):
+                    elif mpd_base_url and not re.match(r'https?://', base_url):
                        if not mpd_base_url.endswith('/'):
                            mpd_base_url += '/'
                        base_url = mpd_base_url + base_url
@@ -2906,7 +2906,7 @@ class InfoExtractor:
                        }

                    def location_key(location):
-                        return 'url' if re.match(r'^https?://', location) else 'path'
+                        return 'url' if re.match(r'https?://', location) else 'path'

                    if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:

--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -14,7 +14,7 @@ from ..utils import (


 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
+    _VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
    IE_NAME = 'fc2'
    _NETRC_MACHINE = 'fc2'
    _TESTS = [{
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2340,7 +2340,7 @@ class GenericIE(InfoExtractor):
                default_search = 'fixup_error'

            if default_search in ('auto', 'auto_warning', 'fixup_error'):
-                if re.match(r'^[^\s/]+\.[^\s/]+/', url):
+                if re.match(r'[^\s/]+\.[^\s/]+/', url):
                    self.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                elif default_search != 'fixup_error':
@@ -2400,7 +2400,7 @@ class GenericIE(InfoExtractor):

        # Check for direct link to a video
        content_type = full_response.headers.get('Content-Type', '').lower()
-        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
+        m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
        if m:
            self.report_detected('direct video link')
            headers = filter_dict({'Referer': smuggled_data.get('referer')})
--- a/yt_dlp/extractor/getcourseru.py
+++ b/yt_dlp/extractor/getcourseru.py
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
    _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
    _VALID_URL = [
        rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
-        rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
+        rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
    ]
    _TESTS = [{
        'url': 'http://academymel.online/3video_1',
--- a/yt_dlp/extractor/golem.py
+++ b/yt_dlp/extractor/golem.py
@@ -7,7 +7,7 @@ from ..utils import (


 class GolemIE(InfoExtractor):
-    _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
+    _VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
    _TEST = {
        'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
        'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
--- a/yt_dlp/extractor/hrfensehen.py
+++ b/yt_dlp/extractor/hrfensehen.py
@@ -13,7 +13,7 @@ from ..utils import (

 class HRFernsehenIE(InfoExtractor):
    IE_NAME = 'hrfernsehen'
-    _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
+    _VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
    _TESTS = [{
        'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
        'md5': '5c4e0ba94677c516a2f65a84110fc536',
--- a/yt_dlp/extractor/japandiet.py
+++ b/yt_dlp/extractor/japandiet.py
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):


 class SangiinInstructionIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
+    _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
    IE_DESC = False  # this shouldn't be listed as a supported site

    def _real_extract(self, url):
-        raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
+        raise ExtractorError(
+            'Copy the link from the button below the video description/player '
+            'and use that link to download. If there is no button in the frame, '
+            'get the URL of the frame showing the video.', expected=True)


 class SangiinIE(InfoExtractor):
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
                (?:
                    kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
                    https?://
-                        (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
+                        (?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
                        (?:
                            (?:
                                # flash player
--- a/yt_dlp/extractor/mailru.py
+++ b/yt_dlp/extractor/mailru.py
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
        video_data = None

        # fix meta_url if missing the host address
-        if re.match(r'^\/\+\/', meta_url):
+        if re.match(r'\/\+\/', meta_url):
            meta_url = urljoin('https://my.mail.ru', meta_url)

        if meta_url:
--- a/yt_dlp/extractor/mgtv.py
+++ b/yt_dlp/extractor/mgtv.py
@@ -16,7 +16,7 @@ from ..utils import (


 class MGTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
    IE_DESC = '芒果TV'
    IE_NAME = 'MangoTV'

--- a/yt_dlp/extractor/mit.py
+++ b/yt_dlp/extractor/mit.py
@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):

 class OCWMITIE(InfoExtractor):
    IE_NAME = 'ocw.mit.edu'
-    _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
+    _VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
    _BASE_URL = 'http://ocw.mit.edu/'

    _TESTS = [
--- a/yt_dlp/extractor/nzonscreen.py
+++ b/yt_dlp/extractor/nzonscreen.py
@@ -10,7 +10,7 @@ from ..utils import (


 class NZOnScreenIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
        'info_dict': {
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -628,8 +628,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
            page_entries = self._extract_entries(webpage, host)
            if not page_entries:
                break
-            for e in page_entries:
-                yield e
+            yield from page_entries
            if not self._has_more(webpage):
                break

--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -16,7 +16,7 @@ from ..utils import (


 class RadioFranceIE(InfoExtractor):
-    _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
    IE_NAME = 'radiofrance'

    _TEST = {
--- a/yt_dlp/extractor/reverbnation.py
+++ b/yt_dlp/extractor/reverbnation.py
@@ -6,7 +6,7 @@ from ..utils import (


 class ReverbNationIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
+    _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
    _TESTS = [{
        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
        'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
--- a/yt_dlp/extractor/tele13.py
+++ b/yt_dlp/extractor/tele13.py
@@ -8,7 +8,7 @@ from ..utils import (


 class Tele13IE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+    _VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
    _TESTS = [
        {
            'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -270,7 +270,7 @@ class TwitCastingLiveIE(InfoExtractor):


 class TwitCastingUserIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
    _TESTS = [{
        'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
        'info_dict': {
--- a/yt_dlp/extractor/viu.py
+++ b/yt_dlp/extractor/viu.py
@@ -90,7 +90,7 @@ class ViuIE(ViuBaseIE):
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')

        for key, value in video_data.items():
-            mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
+            mobj = re.match(r'subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
            if not mobj:
                continue
            subtitles.setdefault(mobj.group('lang'), []).append({
--- a/yt_dlp/extractor/ximalaya.py
+++ b/yt_dlp/extractor/ximalaya.py
@@ -21,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
 class XimalayaIE(XimalayaBaseIE):
    IE_NAME = 'ximalaya'
    IE_DESC = '喜马拉雅FM'
-    _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
    _TESTS = [
        {
            'url': 'http://www.ximalaya.com/sound/47740352/',
--- a/yt_dlp/networking/_websockets.py
+++ b/yt_dlp/networking/_websockets.py
@@ -33,8 +33,8 @@ if not websockets:
 import websockets.version

 websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
-if websockets_version < (12, 0):
-    raise ImportError('Only websockets>=12.0 is supported')
+if websockets_version < (13, 0):
+    raise ImportError('Only websockets>=13.0 is supported')

 import websockets.sync.client
 from websockets.uri import parse_uri
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -647,16 +647,16 @@ def create_parser():
            'You can also simply specify a field to match if the field is present, '
            'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
            'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
-            'the filter matches if at least one of the conditions is met. E.g. --match-filter '
-            '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
+            'the filter matches if at least one of the conditions is met. E.g. --match-filters '
+            '!is_live --match-filters "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
            'matches only videos that are not live OR those that have a like count more than 100 '
            '(or the like field is not available) and also has a description '
            'that contains the phrase "cats & dogs" (caseless). '
-            'Use "--match-filter -" to interactively ask whether to download each video'))
+            'Use "--match-filters -" to interactively ask whether to download each video'))
    selection.add_option(
        '--no-match-filters',
        dest='match_filter', action='store_const', const=None,
-        help='Do not use any --match-filter (default)')
+        help='Do not use any --match-filters (default)')
    selection.add_option(
        '--break-match-filters',
        metavar='FILTER', dest='breaking_match_filter', action='append',
@@ -704,7 +704,7 @@ def create_parser():
    selection.add_option(
        '--break-per-input',
        action='store_true', dest='break_per_url', default=False,
-        help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
+        help='Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL')
    selection.add_option(
        '--no-break-per-input',
        action='store_false', dest='break_per_url',
--- a/yt_dlp/postprocessor/sponsorblock.py
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -33,7 +33,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
    def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
        FFmpegPostProcessor.__init__(self, downloader)
        self._categories = tuple(categories or self.CATEGORIES.keys())
-        self._API_URL = api if re.match('^https?://', api) else 'https://' + api
+        self._API_URL = api if re.match('https?://', api) else 'https://' + api

    def run(self, info):
        extractor = info['extractor_key']
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -1954,7 +1954,7 @@ def urljoin(base, path):
        path = path.decode()
    if not isinstance(path, str) or not path:
        return None
-    if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
+    if re.match(r'(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
        return path
    if isinstance(base, bytes):
        base = base.decode()
@@ -2007,7 +2007,7 @@ def url_or_none(url):
    if not url or not isinstance(url, str):
        return None
    url = url.strip()
-    return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
+    return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None


 def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
@@ -3113,7 +3113,7 @@ def is_html(first_bytes):
        while first_bytes.startswith(bom):
            encoding, first_bytes = enc, first_bytes[len(bom):]

-    return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
+    return re.match(r'\s*<', first_bytes.decode(encoding, 'replace'))


 def determine_protocol(info_dict):