[drbonanza] Simplify and fix duration (#4687)
This commit is contained in:
		| @@ -1,11 +1,15 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| from .common import InfoExtractor |  | ||||||
| from .common import ExtractorError |  | ||||||
| from ..utils import parse_iso8601 |  | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     int_or_none, | ||||||
|  |     parse_iso8601, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class DRBonanzaIE(InfoExtractor): | class DRBonanzaIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)' |     _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)' | ||||||
|  |  | ||||||
| @@ -17,9 +21,10 @@ class DRBonanzaIE(InfoExtractor): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Talkshowet - Leonard Cohen', |             'title': 'Talkshowet - Leonard Cohen', | ||||||
|             'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca', |             'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca', | ||||||
|  |             'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', | ||||||
|             'timestamp': 1295537932, |             'timestamp': 1295537932, | ||||||
|             'upload_date': '20110120', |             'upload_date': '20110120', | ||||||
|             'duration': 3664000, |             'duration': 3664, | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410', |         'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410', | ||||||
| @@ -29,36 +34,46 @@ class DRBonanzaIE(InfoExtractor): | |||||||
|             'ext': 'mp3', |             'ext': 'mp3', | ||||||
|             'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission', |             'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission', | ||||||
|             'description': 'md5:501e5a195749480552e214fbbed16c4e', |             'description': 'md5:501e5a195749480552e214fbbed16c4e', | ||||||
|  |             'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', | ||||||
|             'timestamp': 1223274900, |             'timestamp': 1223274900, | ||||||
|             'upload_date': '20081006', |             'upload_date': '20081006', | ||||||
|             'duration': 7369000, |             'duration': 7369, | ||||||
|         }, |         }, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         url_id = self._match_id(url) |         url_id = self._match_id(url) | ||||||
|          |         webpage = self._download_webpage(url, url_id) | ||||||
|         webpage = self._download_webpage(url, url_id if url_id else "") |  | ||||||
|  |  | ||||||
|         if url_id: |         if url_id: | ||||||
|             info = json.loads(self._html_search_regex(r'({.*?' + url_id + '.*})', webpage, 'json')) |             info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json')) | ||||||
|         else: |         else: | ||||||
|             # Just fetch the first video on that page |             # Just fetch the first video on that page | ||||||
|             info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json')) |             info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json')) | ||||||
|  |  | ||||||
|         asset_id = str(info['AssetId']) |         asset_id = str(info['AssetId']) | ||||||
|         title = info['Title'].rstrip(' \'\"-,.:;!?') |         title = info['Title'].rstrip(' \'\"-,.:;!?') | ||||||
|         duration = info['Duration'] |         duration = int_or_none(info.get('Duration'), scale=1000) | ||||||
|         timestamp = parse_iso8601(re.sub(r'\.\d+$', '', info['Created'])) # First published online. "FirstPublished" contains the date for original airing. |         # First published online. "FirstPublished" contains the date for original airing. | ||||||
|  |         timestamp = parse_iso8601( | ||||||
|  |             re.sub(r'\.\d+$', '', info['Created'])) | ||||||
|  |  | ||||||
|         def parse_filename_info(url): |         def parse_filename_info(url): | ||||||
|             match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) |             match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) | ||||||
|             if match: |             if match: | ||||||
|                 return {'width': int(match.group(1)), 'height': int(match.group(2)), 'bitrate': int(match.group(3)), 'ext': match.group(4)} |                 return { | ||||||
|  |                     'width': int(match.group('width')), | ||||||
|  |                     'height': int(match.group('height')), | ||||||
|  |                     'vbr': int(match.group('bitrate')), | ||||||
|  |                     'ext': match.group('ext') | ||||||
|  |                 } | ||||||
|             match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) |             match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) | ||||||
|             if match: |             if match: | ||||||
|                 return {'bitrate': int(match.group(1)), 'ext': match.group(2)} |                 return { | ||||||
|             return {'width': None, 'height': None, 'bitrate': None, 'ext': None} |                     'vbr': int(match.group('bitrate')), | ||||||
|  |                     'ext': match.group(2) | ||||||
|  |                 } | ||||||
|  |             return {} | ||||||
|  |  | ||||||
|         video_types = ['VideoHigh', 'VideoMid', 'VideoLow'] |         video_types = ['VideoHigh', 'VideoMid', 'VideoLow'] | ||||||
|         preferencemap = { |         preferencemap = { | ||||||
| @@ -72,37 +87,33 @@ class DRBonanzaIE(InfoExtractor): | |||||||
|         for file in info['Files']: |         for file in info['Files']: | ||||||
|             if info['Type'] == "Video": |             if info['Type'] == "Video": | ||||||
|                 if file['Type'] in video_types: |                 if file['Type'] in video_types: | ||||||
|                     fileinfo = parse_filename_info(file['Location']) |                     format = parse_filename_info(file['Location']) | ||||||
|                     formats.append({ |                     format.update({ | ||||||
|                         'url': file['Location'], |                         'url': file['Location'], | ||||||
|                         'format_id': file['Type'].replace('Video', ''), |                         'format_id': file['Type'].replace('Video', ''), | ||||||
|                         'preference': preferencemap.get(file['Type'], -10), |                         'preference': preferencemap.get(file['Type'], -10), | ||||||
|                         'width': fileinfo['width'], |  | ||||||
|                         'height': fileinfo['height'], |  | ||||||
|                         'vbr': fileinfo['bitrate'], |  | ||||||
|                         'ext': fileinfo['ext'], |  | ||||||
|                     }) |                     }) | ||||||
|  |                     formats.append(format) | ||||||
|                 elif file['Type'] == "Thumb": |                 elif file['Type'] == "Thumb": | ||||||
|                     thumbnail = file['Location'] |                     thumbnail = file['Location'] | ||||||
|             elif info['Type'] == "Audio": |             elif info['Type'] == "Audio": | ||||||
|                 if file['Type'] == "Audio": |                 if file['Type'] == "Audio": | ||||||
|                     fileinfo = parse_filename_info(file['Location']) |                     format = parse_filename_info(file['Location']) | ||||||
|                     formats.append({ |                     format.update({ | ||||||
|                         'url': file['Location'], |                         'url': file['Location'], | ||||||
|                         'format_id': file['Type'], |                         'format_id': file['Type'], | ||||||
|                         'abr': fileinfo['bitrate'], |  | ||||||
|                         'ext': fileinfo['ext'], |  | ||||||
|                         'vcodec': 'none', |                         'vcodec': 'none', | ||||||
|                     }) |                     }) | ||||||
|  |                     formats.append(format) | ||||||
|                 elif file['Type'] == "Thumb": |                 elif file['Type'] == "Thumb": | ||||||
|                     thumbnail = file['Location'] |                     thumbnail = file['Location'] | ||||||
|  |  | ||||||
|         description = "{}\n{}\n{}\n".format(info['Description'], info['Actors'], info['Colophon']) |         description = '%s\n%s\n%s\n' % ( | ||||||
|  |             info['Description'], info['Actors'], info['Colophon']) | ||||||
|  |  | ||||||
|         for f in formats: |         for f in formats: | ||||||
|             f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/') |             f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/') | ||||||
|             f['url'] = f['url'].replace('mp4:bonanza', 'bonanza') |             f['url'] = f['url'].replace('mp4:bonanza', 'bonanza') | ||||||
|          |  | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id |         display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister