Fix some regexes
This commit is contained in:
		| @@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE): | ||||
|              r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'], | ||||
|             webpage, 'video url', group='url') | ||||
|         theplatform_metadata = self._download_theplatform_metadata(self._search_regex( | ||||
|             r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) | ||||
|             r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         if theplatform_metadata.get('AETN$isBehindWall'): | ||||
|             requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] | ||||
|   | ||||
| @@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                             continue | ||||
|                         formats.append({ | ||||
|                             'format_id': '%s-%s' % (version, size), | ||||
|                             'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), | ||||
|                             'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), | ||||
|                             'width': int_or_none(size_data.get('width')), | ||||
|                             'height': int_or_none(size_data.get('height')), | ||||
|                             'language': version[:2], | ||||
| @@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor): | ||||
|             formats = [] | ||||
|             for format in settings['metadata']['sizes']: | ||||
|                 # The src is a file pointing to the real video file | ||||
|                 format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) | ||||
|                 format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'format': format['type'], | ||||
|   | ||||
| @@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor): | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', | ||||
|              r'<meta name="dcterms.title" content="(.*?)"/>', | ||||
|              r'<meta name="dcterms\.title" content="(.*?)"/>', | ||||
|              r'<h4 class="headline">(.*?)</h4>'], | ||||
|             webpage, 'title') | ||||
|         description = self._html_search_meta( | ||||
|   | ||||
| @@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                             m3u8_id=format_id, fatal=False)) | ||||
|                         if re.search(self._USP_RE, href): | ||||
|                             usp_formats = self._extract_m3u8_formats( | ||||
|                                 re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), | ||||
|                                 re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href), | ||||
|                                 programme_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                                 m3u8_id=format_id, fatal=False) | ||||
|                             for f in usp_formats: | ||||
|   | ||||
| @@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): | ||||
|  | ||||
|         # vevo embed | ||||
|         vevo_id = self._search_regex( | ||||
|             r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)', | ||||
|             r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)', | ||||
|             webpage, 'vevo embed', default=None) | ||||
|         if vevo_id: | ||||
|             return self.url_result('vevo:%s' % vevo_id, 'Vevo') | ||||
|   | ||||
| @@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor): | ||||
|             'id': '176747451', | ||||
|             'title': 'Best!', | ||||
|             'uploader': 'Anonymous', | ||||
|             'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', | ||||
|             'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$', | ||||
|         }, | ||||
|         'playlist_count': 30, | ||||
|         'skip': 'Only available in .de', | ||||
|   | ||||
| @@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') | ||||
|         info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info') | ||||
|         info = json.loads(info_json) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -2206,7 +2206,7 @@ class GenericIE(InfoExtractor): | ||||
|         # And then there are the jokers who advertise that they use RTA, | ||||
|         # but actually don't. | ||||
|         AGE_LIMIT_MARKERS = [ | ||||
|             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>', | ||||
|             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>', | ||||
|         ] | ||||
|         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS): | ||||
|             age_limit = 18 | ||||
|   | ||||
| @@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor): | ||||
|             'width': int(width), | ||||
|             'height': int(height), | ||||
|         } for width, height, video_url in re.findall( | ||||
|             r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)] | ||||
|             r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): | ||||
|  | ||||
|  | ||||
| class HRTiPlaylistIE(HRTiBaseIE): | ||||
|     _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' | ||||
|     _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -203,7 +203,7 @@ class PCMagIE(IGNIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)' | ||||
|     IE_NAME = 'pcmag' | ||||
|  | ||||
|     _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]' | ||||
|     _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', | ||||
|   | ||||
| @@ -69,9 +69,9 @@ class InfoQIE(BokeCCBaseIE): | ||||
|         }] | ||||
|  | ||||
|     def _extract_cookies(self, webpage): | ||||
|         policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') | ||||
|         signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') | ||||
|         key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') | ||||
|         policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') | ||||
|         signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') | ||||
|         key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') | ||||
|         return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( | ||||
|             policy, signature, key_pair_id) | ||||
|  | ||||
|   | ||||
| @@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         title = self._html_search_meta('name', webpage) or self._og_search_title(webpage) | ||||
|         config_url = self._html_search_regex( | ||||
|             r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"', | ||||
|             r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"', | ||||
|             webpage, 'config URL') | ||||
|         config_url = 'http://www.jeuxvideo.com' + config_url | ||||
|  | ||||
|   | ||||
| @@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor): | ||||
|                 info = { | ||||
|                     'title': self._og_search_title(webpage), | ||||
|                     'description': self._og_search_description(webpage), | ||||
|                     'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), | ||||
|                     'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), | ||||
|                 } | ||||
|             video_data = self._download_json(stream_url, content_id) | ||||
|             is_live = video_data.get('isLive') | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MakerTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' | ||||
|     _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', | ||||
|         'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', | ||||
|   | ||||
| @@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor): | ||||
|  | ||||
|         format_url = self._html_search_regex( | ||||
|             [ | ||||
|                 r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', | ||||
|                 r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', | ||||
|                 r'<a[^>]+href="(rtsp://[^"]+)"' | ||||
|             ], webpage, 'format url') | ||||
|         formats = self._extract_wowza_formats( | ||||
|   | ||||
| @@ -11,7 +11,7 @@ from ..utils import ( | ||||
|  | ||||
| class MeipaiIE(InfoExtractor): | ||||
|     IE_DESC = '美拍' | ||||
|     _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         # regular uploaded video | ||||
|         'url': 'http://www.meipai.com/media/531697625', | ||||
|   | ||||
| @@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|  | ||||
|         if mgid is None or ':' not in mgid: | ||||
|             mgid = self._search_regex( | ||||
|                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], | ||||
|                 [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], | ||||
|                 webpage, 'mgid', default=None) | ||||
|  | ||||
|         if not mgid: | ||||
|   | ||||
| @@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor): | ||||
|         else: | ||||
|             video_playpath = '' | ||||
|  | ||||
|         video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') | ||||
|         video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj') | ||||
|         video_swfobj = compat_urllib_parse_unquote(video_swfobj) | ||||
|  | ||||
|         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", | ||||
|   | ||||
| @@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE): | ||||
|         release_url = self._search_regex( | ||||
|             r'video_auth_playlist_url\s*=\s*"([^"]+)"', | ||||
|             webpage, 'release url') | ||||
|         theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path') | ||||
|         theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') | ||||
|         video_id = theplatform_path.split('/')[-1] | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|   | ||||
| @@ -43,7 +43,7 @@ class NaverIE(InfoExtractor): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', | ||||
|         m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"', | ||||
|                          webpage) | ||||
|         if m_id is None: | ||||
|             error = self._html_search_regex( | ||||
|   | ||||
| @@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE): | ||||
|  | ||||
| class HetKlokhuisIE(NPODataMidEmbedIE): | ||||
|     IE_NAME = 'hetklokhuis' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', | ||||
|   | ||||
| @@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor): | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<param name="src" value="([^"]+)"', webpage, 'video url') | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!</title>', | ||||
|             r'<title>([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!</title>', | ||||
|             webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div id="longdesc">(.+?)<span id="showlink">', | ||||
|   | ||||
| @@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor): | ||||
|                 r'(?s)<description>([^<]+)</description>', | ||||
|                 coursepage, 'description', fatal=False) | ||||
|  | ||||
|             links = orderedSet(re.findall(r'<a href="(VideoPage.php\?[^"]+)">', coursepage)) | ||||
|             links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage)) | ||||
|             info['entries'] = [self.url_result( | ||||
|                 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) | ||||
|             ) for l in links] | ||||
| @@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor): | ||||
|             rootpage = self._download_webpage(rootURL, info['id'], | ||||
|                                               errnote='Unable to download course info page') | ||||
|  | ||||
|             links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage)) | ||||
|             links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage)) | ||||
|             info['entries'] = [self.url_result( | ||||
|                 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) | ||||
|             ) for l in links] | ||||
|   | ||||
| @@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): | ||||
|         def hex_to_bytes(hex): | ||||
|             return binascii.a2b_hex(hex.encode('ascii')) | ||||
|  | ||||
|         relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) | ||||
|         relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1) | ||||
|         clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) | ||||
|         checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() | ||||
|         sig = flags + expiration_date + checksum + str_to_hex(sig_secret) | ||||
|   | ||||
| @@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor): | ||||
|                 info_dict = self._extract_jwplayer_data( | ||||
|                     webpage, video_id, require_title=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', | ||||
|             r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', | ||||
|             webpage, 'uploader name', fatal=False) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', | ||||
|             r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', | ||||
|             webpage, 'uploader id', fatal=False) | ||||
|  | ||||
|         info_dict.update({ | ||||
|   | ||||
| @@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         iframe_url = self._html_search_regex( | ||||
|             r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', | ||||
|             r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', | ||||
|             webpage, 'video iframe', default=None) | ||||
|         if iframe_url: | ||||
|             return self.url_result(iframe_url) | ||||
|   | ||||
| @@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor): | ||||
|  | ||||
| class ViceArticleIE(InfoExtractor): | ||||
|     IE_NAME = 'vice:article' | ||||
|     _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)' | ||||
|     _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', | ||||
|   | ||||
| @@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor): | ||||
|         webpage_url = 'http://videopremium.tv/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         if re.match(r'^<html><head><script[^>]*>window.location\s*=', webpage): | ||||
|         if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage): | ||||
|             # Download again, we need a cookie | ||||
|             webpage = self._download_webpage( | ||||
|                 webpage_url, video_id, | ||||
|   | ||||
| @@ -1683,7 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         video_uploader_id = None | ||||
|         video_uploader_url = None | ||||
|         mobj = re.search( | ||||
|             r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">', | ||||
|             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">', | ||||
|             video_webpage) | ||||
|         if mobj is not None: | ||||
|             video_uploader_id = mobj.group('uploader_id') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․