[seeker] remove Revision3 extractors and fix extraction
This commit is contained in:
		| @@ -932,10 +932,6 @@ from .rentv import ( | ||||
| from .restudy import RestudyIE | ||||
| from .reuters import ReutersIE | ||||
| from .reverbnation import ReverbNationIE | ||||
| from .revision3 import ( | ||||
|     Revision3EmbedIE, | ||||
|     Revision3IE, | ||||
| ) | ||||
| from .rice import RICEIE | ||||
| from .rmcdecouverte import RMCDecouverteIE | ||||
| from .ro220 import Ro220IE | ||||
|   | ||||
| @@ -1,170 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     unescapeHTML, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Revision3EmbedIE(InfoExtractor): | ||||
|     IE_NAME = 'revision3:embed' | ||||
|     _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://api.seekernetwork.com/player/embed?videoId=67558', | ||||
|         'md5': '83bcd157cab89ad7318dd7b8c9cf1306', | ||||
|         'info_dict': { | ||||
|             'id': '67558', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Pros & Cons Of Zoos', | ||||
|             'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', | ||||
|             'uploader_id': 'dnews', | ||||
|             'uploader': 'DNews', | ||||
|         } | ||||
|     } | ||||
|     _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('playlist_id') | ||||
|         playlist_type = mobj.group('playlist_type') or 'video_id' | ||||
|         video_data = self._download_json( | ||||
|             'http://revision3.com/api/getPlaylist.json', playlist_id, query={ | ||||
|                 'api_key': self._API_KEY, | ||||
|                 'codecs': 'h264,vp8,theora', | ||||
|                 playlist_type: playlist_id, | ||||
|             })['items'][0] | ||||
|  | ||||
|         formats = [] | ||||
|         for vcodec, media in video_data['media'].items(): | ||||
|             for quality_id, quality in media.items(): | ||||
|                 if quality_id == 'hls': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         quality['url'], playlist_id, 'mp4', | ||||
|                         'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'url': quality['url'], | ||||
|                         'format_id': '%s-%s' % (vcodec, quality_id), | ||||
|                         'tbr': int_or_none(quality.get('bitrate')), | ||||
|                         'vcodec': vcodec, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': playlist_id, | ||||
|             'title': unescapeHTML(video_data['title']), | ||||
|             'description': unescapeHTML(video_data.get('summary')), | ||||
|             'uploader': video_data.get('show', {}).get('name'), | ||||
|             'uploader_id': video_data.get('show', {}).get('slug'), | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class Revision3IE(InfoExtractor): | ||||
|     IE_NAME = 'revision' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', | ||||
|         'md5': 'd94a72d85d0a829766de4deb8daaf7df', | ||||
|         'info_dict': { | ||||
|             'id': '71089', | ||||
|             'display_id': 'technobuffalo/5-google-predictions-for-2016', | ||||
|             'ext': 'webm', | ||||
|             'title': '5 Google Predictions for 2016', | ||||
|             'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', | ||||
|             'upload_date': '20151228', | ||||
|             'timestamp': 1451325600, | ||||
|             'duration': 187, | ||||
|             'uploader': 'TechnoBuffalo', | ||||
|             'uploader_id': 'technobuffalo', | ||||
|         } | ||||
|     }, { | ||||
|         # Show | ||||
|         'url': 'http://revision3.com/variant', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # Tag | ||||
|         'url': 'http://revision3.com/vr', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         site = domain.split('.')[0] | ||||
|         page_info = self._download_json( | ||||
|             self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) | ||||
|  | ||||
|         page_data = page_info['data'] | ||||
|         page_type = page_data['type'] | ||||
|         if page_type in ('episode', 'embed'): | ||||
|             show_data = page_data['show']['data'] | ||||
|             page_id = compat_str(page_data['id']) | ||||
|             video_id = compat_str(page_data['video']['data']['id']) | ||||
|  | ||||
|             preference = qualities(['mini', 'small', 'medium', 'large']) | ||||
|             thumbnails = [{ | ||||
|                 'url': image_url, | ||||
|                 'id': image_id, | ||||
|                 'preference': preference(image_id) | ||||
|             } for image_id, image_url in page_data.get('images', {}).items()] | ||||
|  | ||||
|             info = { | ||||
|                 'id': page_id, | ||||
|                 'display_id': display_id, | ||||
|                 'title': unescapeHTML(page_data['name']), | ||||
|                 'description': unescapeHTML(page_data.get('summary')), | ||||
|                 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '), | ||||
|                 'author': page_data.get('author'), | ||||
|                 'uploader': show_data.get('name'), | ||||
|                 'uploader_id': show_data.get('slug'), | ||||
|                 'thumbnails': thumbnails, | ||||
|                 'extractor_key': site, | ||||
|             } | ||||
|  | ||||
|             if page_type == 'embed': | ||||
|                 info.update({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'url': page_data['video']['data']['embed'], | ||||
|                 }) | ||||
|                 return info | ||||
|  | ||||
|             info.update({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': 'revision3:%s' % video_id, | ||||
|             }) | ||||
|             return info | ||||
|         else: | ||||
|             list_data = page_info[page_type]['data'] | ||||
|             episodes_data = page_info['episodes']['data'] | ||||
|             num_episodes = page_info['meta']['totalEpisodes'] | ||||
|             processed_episodes = 0 | ||||
|             entries = [] | ||||
|             page_num = 1 | ||||
|             while True: | ||||
|                 entries.extend([{ | ||||
|                     '_type': 'url', | ||||
|                     'url': 'http://%s%s' % (domain, episode['path']), | ||||
|                     'id': compat_str(episode['id']), | ||||
|                     'ie_key': 'Revision3', | ||||
|                     'extractor_key': site, | ||||
|                 } for episode in episodes_data]) | ||||
|                 processed_episodes += len(episodes_data) | ||||
|                 if processed_episodes == num_episodes: | ||||
|                     break | ||||
|                 page_num += 1 | ||||
|                 episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % ( | ||||
|                     domain, display_id + '/' + compat_str(page_num), domain), | ||||
|                     display_id)['episodes']['data'] | ||||
|  | ||||
|             return self.playlist_result( | ||||
|                 entries, compat_str(list_data['id']), | ||||
|                 list_data.get('name'), list_data.get('summary')) | ||||
| @@ -4,34 +4,37 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_element_by_class, | ||||
|     strip_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SeekerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html' | ||||
|     _TESTS = [{ | ||||
|         # player.loadRevision3Item | ||||
|         'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', | ||||
|         'md5': '30c1dc4030cc715cf05b423d0947ac18', | ||||
|         'md5': '897d44bbe0d8986a2ead96de565a92db', | ||||
|         'info_dict': { | ||||
|             'id': '76243', | ||||
|             'ext': 'webm', | ||||
|             'id': 'Elrn3gnY', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Should Trump Be Required To Release His Tax Returns?', | ||||
|             'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', | ||||
|             'uploader': 'Seeker Daily', | ||||
|             'uploader_id': 'seekerdaily', | ||||
|             'description': 'md5:41efa8cfa8d627841045eec7b018eb45', | ||||
|             'timestamp': 1490090165, | ||||
|             'upload_date': '20170321', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', | ||||
|                 'md5': '0497b9f20495174be73ae136949707d2', | ||||
|                 'info_dict': { | ||||
|                     'id': '67558', | ||||
|                     'id': 'FihYQ8AE', | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'The Pros & Cons Of Zoos', | ||||
|                     'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', | ||||
|                     'uploader': 'DNews', | ||||
|                     'uploader_id': 'dnews', | ||||
|                     'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c', | ||||
|                     'timestamp': 1490039133, | ||||
|                     'upload_date': '20170320', | ||||
|                 }, | ||||
|             } | ||||
|         ], | ||||
| @@ -45,13 +48,11 @@ class SeekerIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         display_id, article_id = re.match(self._VALID_URL, url).groups() | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) | ||||
|         if mobj: | ||||
|             playlist_type, playlist_id = mobj.groups() | ||||
|             return self.url_result( | ||||
|                 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id) | ||||
|         else: | ||||
|             entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall( | ||||
|                 r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)] | ||||
|             return self.playlist_result( | ||||
|                 entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) | ||||
|         entries = [] | ||||
|         for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage): | ||||
|             entries.append(self.url_result( | ||||
|                 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id)) | ||||
|         return self.playlist_result( | ||||
|             entries, article_id, | ||||
|             self._og_search_title(webpage), | ||||
|             strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine