[crackle] Fix extraction (closes #15969)
This commit is contained in:
		| @@ -1,31 +1,41 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals, division | from __future__ import unicode_literals, division | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import int_or_none | from ..compat import compat_str | ||||||
|  | from ..utils import ( | ||||||
|  |     determine_ext, | ||||||
|  |     float_or_none, | ||||||
|  |     int_or_none, | ||||||
|  |     parse_age_limit, | ||||||
|  |     parse_duration, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class CrackleIE(InfoExtractor): | class CrackleIE(InfoExtractor): | ||||||
|     _GEO_COUNTRIES = ['US'] |     _GEO_COUNTRIES = ['US'] | ||||||
|     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' |     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', |         'url': 'https://www.crackle.com/andromeda/2502343', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '2498934', |             'id': '2502343', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Everybody Respects A Bloody Nose', |             'title': 'Under The Night', | ||||||
|             'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', |             'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg', |             'duration': 2583, | ||||||
|             'duration': 906, |             'view_count': int, | ||||||
|             'series': 'Comedians In Cars Getting Coffee', |             'average_rating': 0, | ||||||
|             'season_number': 8, |             'age_limit': 14, | ||||||
|             'episode_number': 4, |             'genre': 'Action, Sci-Fi', | ||||||
|             'subtitles': { |             'creator': 'Allan Kroeker', | ||||||
|                 'en-US': [ |             'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe', | ||||||
|                     {'ext': 'vtt'}, |             'release_year': 2000, | ||||||
|                     {'ext': 'tt'}, |             'series': 'Andromeda', | ||||||
|                 ] |             'episode': 'Under The Night', | ||||||
|             }, |             'season_number': 1, | ||||||
|  |             'episode_number': 1, | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             # m3u8 download |             # m3u8 download | ||||||
| @@ -33,108 +43,95 @@ class CrackleIE(InfoExtractor): | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     _THUMBNAIL_RES = [ |  | ||||||
|         (120, 90), |  | ||||||
|         (208, 156), |  | ||||||
|         (220, 124), |  | ||||||
|         (220, 220), |  | ||||||
|         (240, 180), |  | ||||||
|         (250, 141), |  | ||||||
|         (315, 236), |  | ||||||
|         (320, 180), |  | ||||||
|         (360, 203), |  | ||||||
|         (400, 300), |  | ||||||
|         (421, 316), |  | ||||||
|         (460, 330), |  | ||||||
|         (460, 460), |  | ||||||
|         (462, 260), |  | ||||||
|         (480, 270), |  | ||||||
|         (587, 330), |  | ||||||
|         (640, 480), |  | ||||||
|         (700, 330), |  | ||||||
|         (700, 394), |  | ||||||
|         (854, 480), |  | ||||||
|         (1024, 1024), |  | ||||||
|         (1920, 1080), |  | ||||||
|     ] |  | ||||||
|  |  | ||||||
|     # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx |  | ||||||
|     _MEDIA_FILE_SLOTS = { |  | ||||||
|         'c544.flv': { |  | ||||||
|             'width': 544, |  | ||||||
|             'height': 306, |  | ||||||
|         }, |  | ||||||
|         '360p.mp4': { |  | ||||||
|             'width': 640, |  | ||||||
|             'height': 360, |  | ||||||
|         }, |  | ||||||
|         '480p.mp4': { |  | ||||||
|             'width': 852, |  | ||||||
|             'height': 478, |  | ||||||
|         }, |  | ||||||
|         '480p_1mbps.mp4': { |  | ||||||
|             'width': 852, |  | ||||||
|             'height': 478, |  | ||||||
|         }, |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|         config_doc = self._download_xml( |         media = self._download_json( | ||||||
|             'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16', |             'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' | ||||||
|             video_id, 'Downloading config') |             % (video_id, self._GEO_COUNTRIES[0]), video_id, query={ | ||||||
|  |                 'disableProtocols': 'true', | ||||||
|  |                 'format': 'json' | ||||||
|  |             }) | ||||||
|  |  | ||||||
|         item = self._download_xml( |         title = media['Title'] | ||||||
|             'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, |  | ||||||
|             video_id, headers=self.geo_verification_headers()).find('i') |         formats = [] | ||||||
|         title = item.attrib['t'] |         for e in media['MediaURLs']: | ||||||
|  |             if e.get('UseDRM') is True: | ||||||
|  |                 continue | ||||||
|  |             format_url = e.get('Path') | ||||||
|  |             if not format_url or not isinstance(format_url, compat_str): | ||||||
|  |                 continue | ||||||
|  |             ext = determine_ext(format_url) | ||||||
|  |             if ext == 'm3u8': | ||||||
|  |                 formats.extend(self._extract_m3u8_formats( | ||||||
|  |                     format_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||||
|  |                     m3u8_id='hls', fatal=False)) | ||||||
|  |             elif ext == 'mpd': | ||||||
|  |                 formats.extend(self._extract_mpd_formats( | ||||||
|  |                     format_url, video_id, mpd_id='dash', fatal=False)) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         description = media.get('Description') | ||||||
|  |         duration = int_or_none(media.get( | ||||||
|  |             'DurationInSeconds')) or parse_duration(media.get('Duration')) | ||||||
|  |         view_count = int_or_none(media.get('CountViews')) | ||||||
|  |         average_rating = float_or_none(media.get('UserRating')) | ||||||
|  |         age_limit = parse_age_limit(media.get('Rating')) | ||||||
|  |         genre = media.get('Genre') | ||||||
|  |         release_year = int_or_none(media.get('ReleaseYear')) | ||||||
|  |         creator = media.get('Directors') | ||||||
|  |         artist = media.get('Cast') | ||||||
|  |  | ||||||
|  |         if media.get('MediaTypeDisplayValue') == 'Full Episode': | ||||||
|  |             series = media.get('ShowName') | ||||||
|  |             episode = title | ||||||
|  |             season_number = int_or_none(media.get('Season')) | ||||||
|  |             episode_number = int_or_none(media.get('Episode')) | ||||||
|  |         else: | ||||||
|  |             series = episode = season_number = episode_number = None | ||||||
|  |  | ||||||
|         subtitles = {} |         subtitles = {} | ||||||
|         formats = self._extract_m3u8_formats( |         cc_files = media.get('ClosedCaptionFiles') | ||||||
|             'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), |         if isinstance(cc_files, list): | ||||||
|             video_id, 'mp4', m3u8_id='hls', fatal=None) |             for cc_file in cc_files: | ||||||
|  |                 if not isinstance(cc_file, dict): | ||||||
|  |                     continue | ||||||
|  |                 cc_url = cc_file.get('Path') | ||||||
|  |                 if not cc_url or not isinstance(cc_url, compat_str): | ||||||
|  |                     continue | ||||||
|  |                 lang = cc_file.get('Locale') or 'en' | ||||||
|  |                 subtitles.setdefault(lang, []).append({'url': cc_url}) | ||||||
|  |  | ||||||
|         thumbnails = [] |         thumbnails = [] | ||||||
|         path = item.attrib.get('p') |         images = media.get('Images') | ||||||
|         if path: |         if isinstance(images, list): | ||||||
|             for width, height in self._THUMBNAIL_RES: |             for image_key, image_url in images.items(): | ||||||
|                 res = '%dx%d' % (width, height) |                 mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) | ||||||
|  |                 if not mobj: | ||||||
|  |                     continue | ||||||
|                 thumbnails.append({ |                 thumbnails.append({ | ||||||
|                     'id': res, |                     'url': image_url, | ||||||
|                     'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res), |                     'width': int(mobj.group(1)), | ||||||
|                     'width': width, |                     'height': int(mobj.group(2)), | ||||||
|                     'height': height, |  | ||||||
|                     'resolution': res, |  | ||||||
|                 }) |                 }) | ||||||
|             http_base_url = 'http://ahttp.crackle.com/' + path |  | ||||||
|             for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): |  | ||||||
|                 formats.append({ |  | ||||||
|                     'url': http_base_url + mfs_path, |  | ||||||
|                     'format_id': 'http-' + mfs_path.split('.')[0], |  | ||||||
|                     'width': mfs_info['width'], |  | ||||||
|                     'height': mfs_info['height'], |  | ||||||
|                 }) |  | ||||||
|             for cc in item.findall('cc'): |  | ||||||
|                 locale = cc.attrib.get('l') |  | ||||||
|                 v = cc.attrib.get('v') |  | ||||||
|                 if locale and v: |  | ||||||
|                     if locale not in subtitles: |  | ||||||
|                         subtitles[locale] = [] |  | ||||||
|                     for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')): |  | ||||||
|                         subtitles.setdefault(locale, []).append({ |  | ||||||
|                             'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext), |  | ||||||
|                             'ext': ext, |  | ||||||
|                         }) |  | ||||||
|         self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'description': item.attrib.get('d'), |             'description': description, | ||||||
|             'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None, |             'duration': duration, | ||||||
|             'series': item.attrib.get('sn'), |             'view_count': view_count, | ||||||
|             'season_number': int_or_none(item.attrib.get('se')), |             'average_rating': average_rating, | ||||||
|             'episode_number': int_or_none(item.attrib.get('ep')), |             'age_limit': age_limit, | ||||||
|  |             'genre': genre, | ||||||
|  |             'creator': creator, | ||||||
|  |             'artist': artist, | ||||||
|  |             'release_year': release_year, | ||||||
|  |             'series': series, | ||||||
|  |             'episode': episode, | ||||||
|  |             'season_number': season_number, | ||||||
|  |             'episode_number': episode_number, | ||||||
|             'thumbnails': thumbnails, |             'thumbnails': thumbnails, | ||||||
|             'subtitles': subtitles, |             'subtitles': subtitles, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․