[crackle] Fix extraction (closes #15969)
This commit is contained in:
		| @@ -1,31 +1,41 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals, division | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CrackleIE(InfoExtractor): | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', | ||||
|         'url': 'https://www.crackle.com/andromeda/2502343', | ||||
|         'info_dict': { | ||||
|             'id': '2498934', | ||||
|             'id': '2502343', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Everybody Respects A Bloody Nose', | ||||
|             'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', | ||||
|             'thumbnail': r're:^https?://.*\.jpg', | ||||
|             'duration': 906, | ||||
|             'series': 'Comedians In Cars Getting Coffee', | ||||
|             'season_number': 8, | ||||
|             'episode_number': 4, | ||||
|             'subtitles': { | ||||
|                 'en-US': [ | ||||
|                     {'ext': 'vtt'}, | ||||
|                     {'ext': 'tt'}, | ||||
|                 ] | ||||
|             }, | ||||
|             'title': 'Under The Night', | ||||
|             'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a', | ||||
|             'duration': 2583, | ||||
|             'view_count': int, | ||||
|             'average_rating': 0, | ||||
|             'age_limit': 14, | ||||
|             'genre': 'Action, Sci-Fi', | ||||
|             'creator': 'Allan Kroeker', | ||||
|             'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe', | ||||
|             'release_year': 2000, | ||||
|             'series': 'Andromeda', | ||||
|             'episode': 'Under The Night', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 1, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
| @@ -33,108 +43,95 @@ class CrackleIE(InfoExtractor): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     _THUMBNAIL_RES = [ | ||||
|         (120, 90), | ||||
|         (208, 156), | ||||
|         (220, 124), | ||||
|         (220, 220), | ||||
|         (240, 180), | ||||
|         (250, 141), | ||||
|         (315, 236), | ||||
|         (320, 180), | ||||
|         (360, 203), | ||||
|         (400, 300), | ||||
|         (421, 316), | ||||
|         (460, 330), | ||||
|         (460, 460), | ||||
|         (462, 260), | ||||
|         (480, 270), | ||||
|         (587, 330), | ||||
|         (640, 480), | ||||
|         (700, 330), | ||||
|         (700, 394), | ||||
|         (854, 480), | ||||
|         (1024, 1024), | ||||
|         (1920, 1080), | ||||
|     ] | ||||
|  | ||||
|     # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx | ||||
|     _MEDIA_FILE_SLOTS = { | ||||
|         'c544.flv': { | ||||
|             'width': 544, | ||||
|             'height': 306, | ||||
|         }, | ||||
|         '360p.mp4': { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|         '480p.mp4': { | ||||
|             'width': 852, | ||||
|             'height': 478, | ||||
|         }, | ||||
|         '480p_1mbps.mp4': { | ||||
|             'width': 852, | ||||
|             'height': 478, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         config_doc = self._download_xml( | ||||
|             'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16', | ||||
|             video_id, 'Downloading config') | ||||
|         media = self._download_json( | ||||
|             'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' | ||||
|             % (video_id, self._GEO_COUNTRIES[0]), video_id, query={ | ||||
|                 'disableProtocols': 'true', | ||||
|                 'format': 'json' | ||||
|             }) | ||||
|  | ||||
|         item = self._download_xml( | ||||
|             'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, | ||||
|             video_id, headers=self.geo_verification_headers()).find('i') | ||||
|         title = item.attrib['t'] | ||||
|         title = media['Title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for e in media['MediaURLs']: | ||||
|             if e.get('UseDRM') is True: | ||||
|                 continue | ||||
|             format_url = e.get('Path') | ||||
|             if not format_url or not isinstance(format_url, compat_str): | ||||
|                 continue | ||||
|             ext = determine_ext(format_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             elif ext == 'mpd': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     format_url, video_id, mpd_id='dash', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = media.get('Description') | ||||
|         duration = int_or_none(media.get( | ||||
|             'DurationInSeconds')) or parse_duration(media.get('Duration')) | ||||
|         view_count = int_or_none(media.get('CountViews')) | ||||
|         average_rating = float_or_none(media.get('UserRating')) | ||||
|         age_limit = parse_age_limit(media.get('Rating')) | ||||
|         genre = media.get('Genre') | ||||
|         release_year = int_or_none(media.get('ReleaseYear')) | ||||
|         creator = media.get('Directors') | ||||
|         artist = media.get('Cast') | ||||
|  | ||||
|         if media.get('MediaTypeDisplayValue') == 'Full Episode': | ||||
|             series = media.get('ShowName') | ||||
|             episode = title | ||||
|             season_number = int_or_none(media.get('Season')) | ||||
|             episode_number = int_or_none(media.get('Episode')) | ||||
|         else: | ||||
|             series = episode = season_number = episode_number = None | ||||
|  | ||||
|         subtitles = {} | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), | ||||
|             video_id, 'mp4', m3u8_id='hls', fatal=None) | ||||
|         cc_files = media.get('ClosedCaptionFiles') | ||||
|         if isinstance(cc_files, list): | ||||
|             for cc_file in cc_files: | ||||
|                 if not isinstance(cc_file, dict): | ||||
|                     continue | ||||
|                 cc_url = cc_file.get('Path') | ||||
|                 if not cc_url or not isinstance(cc_url, compat_str): | ||||
|                     continue | ||||
|                 lang = cc_file.get('Locale') or 'en' | ||||
|                 subtitles.setdefault(lang, []).append({'url': cc_url}) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         path = item.attrib.get('p') | ||||
|         if path: | ||||
|             for width, height in self._THUMBNAIL_RES: | ||||
|                 res = '%dx%d' % (width, height) | ||||
|         images = media.get('Images') | ||||
|         if isinstance(images, list): | ||||
|             for image_key, image_url in images.items(): | ||||
|                 mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) | ||||
|                 if not mobj: | ||||
|                     continue | ||||
|                 thumbnails.append({ | ||||
|                     'id': res, | ||||
|                     'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res), | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                     'resolution': res, | ||||
|                     'url': image_url, | ||||
|                     'width': int(mobj.group(1)), | ||||
|                     'height': int(mobj.group(2)), | ||||
|                 }) | ||||
|             http_base_url = 'http://ahttp.crackle.com/' + path | ||||
|             for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): | ||||
|                 formats.append({ | ||||
|                     'url': http_base_url + mfs_path, | ||||
|                     'format_id': 'http-' + mfs_path.split('.')[0], | ||||
|                     'width': mfs_info['width'], | ||||
|                     'height': mfs_info['height'], | ||||
|                 }) | ||||
|             for cc in item.findall('cc'): | ||||
|                 locale = cc.attrib.get('l') | ||||
|                 v = cc.attrib.get('v') | ||||
|                 if locale and v: | ||||
|                     if locale not in subtitles: | ||||
|                         subtitles[locale] = [] | ||||
|                     for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')): | ||||
|                         subtitles.setdefault(locale, []).append({ | ||||
|                             'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext), | ||||
|                             'ext': ext, | ||||
|                         }) | ||||
|         self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': item.attrib.get('d'), | ||||
|             'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None, | ||||
|             'series': item.attrib.get('sn'), | ||||
|             'season_number': int_or_none(item.attrib.get('se')), | ||||
|             'episode_number': int_or_none(item.attrib.get('ep')), | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'average_rating': average_rating, | ||||
|             'age_limit': age_limit, | ||||
|             'genre': genre, | ||||
|             'creator': creator, | ||||
|             'artist': artist, | ||||
|             'release_year': release_year, | ||||
|             'series': series, | ||||
|             'episode': episode, | ||||
|             'season_number': season_number, | ||||
|             'episode_number': episode_number, | ||||
|             'thumbnails': thumbnails, | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․