[youtube] Adapt to new automatic captions rendition (closes #13467)
This commit is contained in:
		| @@ -1269,37 +1269,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|                     sub_lang_list[sub_lang] = sub_formats |                     sub_lang_list[sub_lang] = sub_formats | ||||||
|                 return sub_lang_list |                 return sub_lang_list | ||||||
|  |  | ||||||
|             # Some videos don't provide ttsurl but rather caption_tracks and |             def make_captions(sub_url, sub_langs): | ||||||
|             # caption_translation_languages (e.g. 20LmZk1hakA) |                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url) | ||||||
|             caption_tracks = args['caption_tracks'] |                 caption_qs = compat_parse_qs(parsed_sub_url.query) | ||||||
|             caption_translation_languages = args['caption_translation_languages'] |                 captions = {} | ||||||
|             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] |                 for sub_lang in sub_langs: | ||||||
|             parsed_caption_url = compat_urllib_parse_urlparse(caption_url) |  | ||||||
|             caption_qs = compat_parse_qs(parsed_caption_url.query) |  | ||||||
|  |  | ||||||
|             sub_lang_list = {} |  | ||||||
|             for lang in caption_translation_languages.split(','): |  | ||||||
|                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang)) |  | ||||||
|                 sub_lang = lang_qs.get('lc', [None])[0] |  | ||||||
|                 if not sub_lang: |  | ||||||
|                     continue |  | ||||||
|                     sub_formats = [] |                     sub_formats = [] | ||||||
|                     for ext in self._SUBTITLE_FORMATS: |                     for ext in self._SUBTITLE_FORMATS: | ||||||
|                         caption_qs.update({ |                         caption_qs.update({ | ||||||
|                             'tlang': [sub_lang], |                             'tlang': [sub_lang], | ||||||
|                             'fmt': [ext], |                             'fmt': [ext], | ||||||
|                         }) |                         }) | ||||||
|                     sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace( |                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace( | ||||||
|                             query=compat_urllib_parse_urlencode(caption_qs, True))) |                             query=compat_urllib_parse_urlencode(caption_qs, True))) | ||||||
|                         sub_formats.append({ |                         sub_formats.append({ | ||||||
|                             'url': sub_url, |                             'url': sub_url, | ||||||
|                             'ext': ext, |                             'ext': ext, | ||||||
|                         }) |                         }) | ||||||
|                 sub_lang_list[sub_lang] = sub_formats |                     captions[sub_lang] = sub_formats | ||||||
|             return sub_lang_list |                 return captions | ||||||
|  |  | ||||||
|  |             # New captions format as of 22.06.2017 | ||||||
|  |             player_response = args.get('player_response') | ||||||
|  |             if player_response and isinstance(player_response, compat_str): | ||||||
|  |                 player_response = self._parse_json( | ||||||
|  |                     player_response, video_id, fatal=False) | ||||||
|  |                 if player_response: | ||||||
|  |                     renderer = player_response['captions']['playerCaptionsTracklistRenderer'] | ||||||
|  |                     base_url = renderer['captionTracks'][0]['baseUrl'] | ||||||
|  |                     sub_lang_list = [] | ||||||
|  |                     for lang in renderer['translationLanguages']: | ||||||
|  |                         lang_code = lang.get('languageCode') | ||||||
|  |                         if lang_code: | ||||||
|  |                             sub_lang_list.append(lang_code) | ||||||
|  |                     return make_captions(base_url, sub_lang_list) | ||||||
|  |  | ||||||
|  |             # Some videos don't provide ttsurl but rather caption_tracks and | ||||||
|  |             # caption_translation_languages (e.g. 20LmZk1hakA) | ||||||
|  |             # Does not used anymore as of 22.06.2017 | ||||||
|  |             caption_tracks = args['caption_tracks'] | ||||||
|  |             caption_translation_languages = args['caption_translation_languages'] | ||||||
|  |             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] | ||||||
|  |             sub_lang_list = [] | ||||||
|  |             for lang in caption_translation_languages.split(','): | ||||||
|  |                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang)) | ||||||
|  |                 sub_lang = lang_qs.get('lc', [None])[0] | ||||||
|  |                 if sub_lang: | ||||||
|  |                     sub_lang_list.append(sub_lang) | ||||||
|  |             return make_captions(caption_url, sub_lang_list) | ||||||
|         # An extractor error can be raise by the download process if there are |         # An extractor error can be raise by the download process if there are | ||||||
|         # no automatic captions but there are subtitles |         # no automatic captions but there are subtitles | ||||||
|         except (KeyError, ExtractorError): |         except (KeyError, IndexError, ExtractorError): | ||||||
|             self._downloader.report_warning(err_msg) |             self._downloader.report_warning(err_msg) | ||||||
|             return {} |             return {} | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․