[yahoo] Process query result embedded in webpage
This commit is contained in:
		| @@ -144,6 +144,20 @@ class YahooIE(InfoExtractor): | ||||
|         }, { | ||||
|             'url': 'https://tw.news.yahoo.com/-100120367.html', | ||||
|             'only_matching': True, | ||||
|         }, { | ||||
|             # Query result is embedded in webpage, but explicit request to video API fails with geo restriction | ||||
|             'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', | ||||
|             'info_dict': { | ||||
|                 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Communitary - Community Episode 1: Ladders', | ||||
|                 'description': 'md5:8fc39608213295748e1e289807838c97', | ||||
|                 'duration': 1646, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -171,6 +185,19 @@ class YahooIE(InfoExtractor): | ||||
|         if nbc_sports_url: | ||||
|             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') | ||||
|  | ||||
|         # Query result is often embedded in webpage as JSON. Sometimes explicit requests | ||||
|         # to video API results in a failure with geo restriction reason therefore using | ||||
|         # embedded query result when present sounds reasonable. | ||||
|         config_json = self._search_regex( | ||||
|             r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)', | ||||
|             webpage, 'videoplayer applet', default=None) | ||||
|         if config_json: | ||||
|             config = self._parse_json(config_json, display_id, fatal=False) | ||||
|             if config: | ||||
|                 sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') | ||||
|                 if sapi: | ||||
|                     return self._extract_info(display_id, sapi, webpage) | ||||
|  | ||||
|         items_json = self._search_regex( | ||||
|             r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, | ||||
|             default=None) | ||||
| @@ -190,22 +217,10 @@ class YahooIE(InfoExtractor): | ||||
|             video_id = info['id'] | ||||
|         return self._get_info(video_id, display_id, webpage) | ||||
|  | ||||
|     def _get_info(self, video_id, display_id, webpage): | ||||
|         region = self._search_regex( | ||||
|             r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', | ||||
|             webpage, 'region', fatal=False, default='US') | ||||
|         data = compat_urllib_parse.urlencode({ | ||||
|             'protocol': 'http', | ||||
|             'region': region, | ||||
|         }) | ||||
|         query_url = ( | ||||
|             'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' | ||||
|             '{id}?{data}'.format(id=video_id, data=data)) | ||||
|         query_result = self._download_json( | ||||
|             query_url, display_id, 'Downloading video info') | ||||
|  | ||||
|         info = query_result['query']['results']['mediaObj'][0] | ||||
|     def _extract_info(self, display_id, query, webpage): | ||||
|         info = query['query']['results']['mediaObj'][0] | ||||
|         meta = info.get('meta') | ||||
|         video_id = info.get('id') | ||||
|  | ||||
|         if not meta: | ||||
|             msg = info['status'].get('msg') | ||||
| @@ -231,6 +246,9 @@ class YahooIE(InfoExtractor): | ||||
|                     'ext': 'flv', | ||||
|                 }) | ||||
|             else: | ||||
|                 if s.get('format') == 'm3u8_playlist': | ||||
|                     format_info['protocol'] = 'm3u8_native' | ||||
|                     format_info['ext'] = 'mp4' | ||||
|                 format_url = compat_urlparse.urljoin(host, path) | ||||
|                 format_info['url'] = format_url | ||||
|             formats.append(format_info) | ||||
| @@ -264,6 +282,21 @@ class YahooIE(InfoExtractor): | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _get_info(self, video_id, display_id, webpage): | ||||
|         region = self._search_regex( | ||||
|             r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', | ||||
|             webpage, 'region', fatal=False, default='US') | ||||
|         data = compat_urllib_parse.urlencode({ | ||||
|             'protocol': 'http', | ||||
|             'region': region, | ||||
|         }) | ||||
|         query_url = ( | ||||
|             'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' | ||||
|             '{id}?{data}'.format(id=video_id, data=data)) | ||||
|         query_result = self._download_json( | ||||
|             query_url, display_id, 'Downloading video info') | ||||
|         return self._extract_info(display_id, query_result, webpage) | ||||
|  | ||||
|  | ||||
| class YahooSearchIE(SearchInfoExtractor): | ||||
|     IE_DESC = 'Yahoo screen search' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․