[itv] Make SOAP request non fatal and extract metadata from a webpage (closes #16780)
This commit is contained in:
		| @@ -18,6 +18,7 @@ from ..utils import ( | |||||||
|     xpath_element, |     xpath_element, | ||||||
|     xpath_text, |     xpath_text, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     merge_dicts, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
| @@ -129,64 +130,65 @@ class ITVIE(InfoExtractor): | |||||||
|  |  | ||||||
|         resp_env = self._download_xml( |         resp_env = self._download_xml( | ||||||
|             params['data-playlist-url'], video_id, |             params['data-playlist-url'], video_id, | ||||||
|             headers=headers, data=etree.tostring(req_env)) |             headers=headers, data=etree.tostring(req_env), fatal=False) | ||||||
|         playlist = xpath_element(resp_env, './/Playlist') |         if resp_env: | ||||||
|         if playlist is None: |             playlist = xpath_element(resp_env, './/Playlist') | ||||||
|             fault_code = xpath_text(resp_env, './/faultcode') |             if playlist is None: | ||||||
|             fault_string = xpath_text(resp_env, './/faultstring') |                 fault_code = xpath_text(resp_env, './/faultcode') | ||||||
|             if fault_code == 'InvalidGeoRegion': |                 fault_string = xpath_text(resp_env, './/faultstring') | ||||||
|                 self.raise_geo_restricted( |                 if fault_code == 'InvalidGeoRegion': | ||||||
|                     msg=fault_string, countries=self._GEO_COUNTRIES) |                     self.raise_geo_restricted( | ||||||
|             elif fault_code not in ( |                         msg=fault_string, countries=self._GEO_COUNTRIES) | ||||||
|                     'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): |                 elif fault_code not in ( | ||||||
|                 raise ExtractorError( |                         'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): | ||||||
|                     '%s said: %s' % (self.IE_NAME, fault_string), expected=True) |                     raise ExtractorError( | ||||||
|             info.update({ |                         '%s said: %s' % (self.IE_NAME, fault_string), expected=True) | ||||||
|                 'title': self._og_search_title(webpage), |                 info.update({ | ||||||
|                 'episode_title': params.get('data-video-episode'), |                     'title': self._og_search_title(webpage), | ||||||
|                 'series': params.get('data-video-title'), |                     'episode_title': params.get('data-video-episode'), | ||||||
|             }) |                     'series': params.get('data-video-title'), | ||||||
|         else: |                 }) | ||||||
|             title = xpath_text(playlist, 'EpisodeTitle', default=None) |             else: | ||||||
|             info.update({ |                 title = xpath_text(playlist, 'EpisodeTitle', default=None) | ||||||
|                 'title': title, |                 info.update({ | ||||||
|                 'episode_title': title, |                     'title': title, | ||||||
|                 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), |                     'episode_title': title, | ||||||
|                 'series': xpath_text(playlist, 'ProgrammeTitle'), |                     'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), | ||||||
|                 'duration': parse_duration(xpath_text(playlist, 'Duration')), |                     'series': xpath_text(playlist, 'ProgrammeTitle'), | ||||||
|             }) |                     'duration': parse_duration(xpath_text(playlist, 'Duration')), | ||||||
|             video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) |                 }) | ||||||
|             media_files = xpath_element(video_element, 'MediaFiles', fatal=True) |                 video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) | ||||||
|             rtmp_url = media_files.attrib['base'] |                 media_files = xpath_element(video_element, 'MediaFiles', fatal=True) | ||||||
|  |                 rtmp_url = media_files.attrib['base'] | ||||||
|  |  | ||||||
|             for media_file in media_files.findall('MediaFile'): |                 for media_file in media_files.findall('MediaFile'): | ||||||
|                 play_path = xpath_text(media_file, 'URL') |                     play_path = xpath_text(media_file, 'URL') | ||||||
|                 if not play_path: |                     if not play_path: | ||||||
|                     continue |                         continue | ||||||
|                 tbr = int_or_none(media_file.get('bitrate'), 1000) |                     tbr = int_or_none(media_file.get('bitrate'), 1000) | ||||||
|                 f = { |                     f = { | ||||||
|                     'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), |                         'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), | ||||||
|                     'play_path': play_path, |                         'play_path': play_path, | ||||||
|                     # Providing this swfVfy allows to avoid truncated downloads |                         # Providing this swfVfy allows to avoid truncated downloads | ||||||
|                     'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', |                         'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', | ||||||
|                     'page_url': url, |                         'page_url': url, | ||||||
|                     'tbr': tbr, |                         'tbr': tbr, | ||||||
|                     'ext': 'flv', |                         'ext': 'flv', | ||||||
|                 } |                     } | ||||||
|                 app = self._search_regex( |                     app = self._search_regex( | ||||||
|                     'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) |                         'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) | ||||||
|                 if app: |                     if app: | ||||||
|                     f.update({ |                         f.update({ | ||||||
|                         'url': rtmp_url.split('?', 1)[0], |                             'url': rtmp_url.split('?', 1)[0], | ||||||
|                         'app': app, |                             'app': app, | ||||||
|                     }) |                         }) | ||||||
|                 else: |                     else: | ||||||
|                     f['url'] = rtmp_url |                         f['url'] = rtmp_url | ||||||
|                 formats.append(f) |                     formats.append(f) | ||||||
|  |  | ||||||
|             for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): |                 for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): | ||||||
|                 if caption_url.text: |                     if caption_url.text: | ||||||
|                     extract_subtitle(caption_url.text) |                         extract_subtitle(caption_url.text) | ||||||
|  |  | ||||||
|         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') |         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') | ||||||
|         hmac = params.get('data-video-hmac') |         hmac = params.get('data-video-hmac') | ||||||
| @@ -261,7 +263,17 @@ class ITVIE(InfoExtractor): | |||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'subtitles': subtitles, |             'subtitles': subtitles, | ||||||
|         }) |         }) | ||||||
|         return info |  | ||||||
|  |         webpage_info = self._search_json_ld(webpage, video_id, default={}) | ||||||
|  |         if not webpage_info.get('title'): | ||||||
|  |             webpage_info['title'] = self._html_search_regex( | ||||||
|  |                 r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<', | ||||||
|  |                 webpage, 'title', default=None) or self._og_search_title( | ||||||
|  |                 webpage, default=None) or self._html_search_meta( | ||||||
|  |                 'twitter:title', webpage, 'title', | ||||||
|  |                 default=None) or webpage_info['episode'] | ||||||
|  |  | ||||||
|  |         return merge_dicts(info, webpage_info) | ||||||
|  |  | ||||||
|  |  | ||||||
| class ITVBTCCIE(InfoExtractor): | class ITVBTCCIE(InfoExtractor): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․