[nova:embed] Fix extraction (closes #24700)
This commit is contained in:
		| @@ -6,6 +6,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     qualities, | ||||
| @@ -33,42 +34,76 @@ class NovaEmbedIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         bitrates = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|  | ||||
|         QUALITIES = ('lq', 'mq', 'hq', 'hd') | ||||
|         quality_key = qualities(QUALITIES) | ||||
|  | ||||
|         duration = None | ||||
|         formats = [] | ||||
|         for format_id, format_list in bitrates.items(): | ||||
|             if not isinstance(format_list, list): | ||||
|                 format_list = [format_list] | ||||
|             for format_url in format_list: | ||||
|                 format_url = url_or_none(format_url) | ||||
|                 if not format_url: | ||||
|                     continue | ||||
|                 if format_id == 'hls': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         format_url, video_id, ext='mp4', | ||||
|                         entry_protocol='m3u8_native', m3u8_id='hls', | ||||
|                         fatal=False)) | ||||
|                     continue | ||||
|                 f = { | ||||
|                     'url': format_url, | ||||
|                 } | ||||
|                 f_id = format_id | ||||
|                 for quality in QUALITIES: | ||||
|                     if '%s.mp4' % quality in format_url: | ||||
|                         f_id += '-%s' % quality | ||||
|                         f.update({ | ||||
|                             'quality': quality_key(quality), | ||||
|                             'format_note': quality.upper(), | ||||
|  | ||||
|         player = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;', | ||||
|                 webpage, 'player', default='{}'), video_id, fatal=False) | ||||
|         if player: | ||||
|             for format_id, format_list in player['tracks'].items(): | ||||
|                 if not isinstance(format_list, list): | ||||
|                     format_list = [format_list] | ||||
|                 for format_dict in format_list: | ||||
|                     if not isinstance(format_dict, dict): | ||||
|                         continue | ||||
|                     format_url = url_or_none(format_dict.get('src')) | ||||
|                     format_type = format_dict.get('type') | ||||
|                     ext = determine_ext(format_url) | ||||
|                     if (format_type == 'application/x-mpegURL' | ||||
|                             or format_id == 'HLS' or ext == 'm3u8'): | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             format_url, video_id, 'mp4', | ||||
|                             entry_protocol='m3u8_native', m3u8_id='hls', | ||||
|                             fatal=False)) | ||||
|                     elif (format_type == 'application/dash+xml' | ||||
|                           or format_id == 'DASH' or ext == 'mpd'): | ||||
|                         formats.extend(self._extract_mpd_formats( | ||||
|                             format_url, video_id, mpd_id='dash', fatal=False)) | ||||
|                     else: | ||||
|                         formats.append({ | ||||
|                             'url': format_url, | ||||
|                         }) | ||||
|                         break | ||||
|                 f['format_id'] = f_id | ||||
|                 formats.append(f) | ||||
|             duration = int_or_none(player.get('duration')) | ||||
|         else: | ||||
|             # Old path, not actual as of 08.04.2020 | ||||
|             bitrates = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), | ||||
|                 video_id, transform_source=js_to_json) | ||||
|  | ||||
|             QUALITIES = ('lq', 'mq', 'hq', 'hd') | ||||
|             quality_key = qualities(QUALITIES) | ||||
|  | ||||
|             for format_id, format_list in bitrates.items(): | ||||
|                 if not isinstance(format_list, list): | ||||
|                     format_list = [format_list] | ||||
|                 for format_url in format_list: | ||||
|                     format_url = url_or_none(format_url) | ||||
|                     if not format_url: | ||||
|                         continue | ||||
|                     if format_id == 'hls': | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             format_url, video_id, ext='mp4', | ||||
|                             entry_protocol='m3u8_native', m3u8_id='hls', | ||||
|                             fatal=False)) | ||||
|                         continue | ||||
|                     f = { | ||||
|                         'url': format_url, | ||||
|                     } | ||||
|                     f_id = format_id | ||||
|                     for quality in QUALITIES: | ||||
|                         if '%s.mp4' % quality in format_url: | ||||
|                             f_id += '-%s' % quality | ||||
|                             f.update({ | ||||
|                                 'quality': quality_key(quality), | ||||
|                                 'format_note': quality.upper(), | ||||
|                             }) | ||||
|                             break | ||||
|                     f['format_id'] = f_id | ||||
|                     formats.append(f) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._og_search_title( | ||||
| @@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor): | ||||
|             r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, | ||||
|             'thumbnail', fatal=False, group='value') | ||||
|         duration = int_or_none(self._search_regex( | ||||
|             r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) | ||||
|             r'videoDuration\s*:\s*(\d+)', webpage, 'duration', | ||||
|             default=duration)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․