Add support for direct links to a video (#1973)
This commit is contained in:
		| @@ -13,6 +13,8 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     smuggle_url, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     url_basename, | ||||
| ) | ||||
| from .brightcove import BrightcoveIE | ||||
|  | ||||
| @@ -71,6 +73,17 @@ class GenericIE(InfoExtractor): | ||||
|                 u'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # Direct link to a video | ||||
|         { | ||||
|             u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4', | ||||
|             u'file': u'trailer.mp4', | ||||
|             u'md5': u'67d406c2bcb6af27fa886f31aa934bbe', | ||||
|             u'info_dict': { | ||||
|                 u'id': u'trailer', | ||||
|                 u'title': u'trailer', | ||||
|                 u'upload_date': u'20100513', | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
| @@ -83,7 +96,7 @@ class GenericIE(InfoExtractor): | ||||
|         """Report information extraction.""" | ||||
|         self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) | ||||
|  | ||||
|     def _test_redirect(self, url): | ||||
|     def _send_head(self, url): | ||||
|         """Check if it is a redirect, like url shorteners, in case return the new url.""" | ||||
|         class HeadRequest(compat_urllib_request.Request): | ||||
|             def get_method(self): | ||||
| @@ -131,29 +144,46 @@ class GenericIE(InfoExtractor): | ||||
|         response = opener.open(HeadRequest(url)) | ||||
|         if response is None: | ||||
|             raise ExtractorError(u'Invalid URL protocol') | ||||
|         new_url = response.geturl() | ||||
|  | ||||
|         if url == new_url: | ||||
|             return False | ||||
|  | ||||
|         self.report_following_redirect(new_url) | ||||
|         return new_url | ||||
|         return response | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         parsed_url = compat_urlparse.urlparse(url) | ||||
|         if not parsed_url.scheme: | ||||
|             self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') | ||||
|             return self.url_result('http://' + url) | ||||
|         video_id = os.path.splitext(url.split('/')[-1])[0] | ||||
|  | ||||
|         try: | ||||
|             new_url = self._test_redirect(url) | ||||
|             if new_url: | ||||
|             response = self._send_head(url) | ||||
|  | ||||
|             # Check for redirect | ||||
|             new_url = response.geturl() | ||||
|             if url != new_url: | ||||
|                 self.report_following_redirect(new_url) | ||||
|                 return self.url_result(new_url) | ||||
|  | ||||
|             # Check for direct link to a video | ||||
|             content_type = response.headers.get('Content-Type', '') | ||||
|             m = re.match(r'^(?:audio|video)/(?P<format_id>.+)$', content_type) | ||||
|             if m: | ||||
|                 upload_date = response.headers.get('Last-Modified') | ||||
|                 if upload_date: | ||||
|                     upload_date = unified_strdate(upload_date) | ||||
|                 assert (url_basename(url) == 'trailer.mp4') | ||||
|                 return { | ||||
|                     'id': video_id, | ||||
|                     'title': os.path.splitext(url_basename(url))[0], | ||||
|                     'formats': [{ | ||||
|                         'format_id': m.group('format_id'), | ||||
|                         'url': url, | ||||
|                     }], | ||||
|                     'upload_date': upload_date, | ||||
|                 } | ||||
|  | ||||
|         except compat_urllib_error.HTTPError: | ||||
|             # This may be a stupid server that doesn't like HEAD, our UA, or so | ||||
|             pass | ||||
|  | ||||
|         video_id = url.split('/')[-1] | ||||
|         try: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|         except ValueError: | ||||
|   | ||||
| @@ -761,12 +761,17 @@ def unified_strdate(date_str): | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|         '%Y-%m-%dT%H:%M:%S', | ||||
|         '%Y-%m-%dT%H:%M:%S', | ||||
|     ] | ||||
|     for expression in format_expressions: | ||||
|         try: | ||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||
|         except: | ||||
|             pass | ||||
|     if upload_date is None: | ||||
|         timetuple = email.utils.parsedate_tz(date_str) | ||||
|         if timetuple: | ||||
|             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') | ||||
|     return upload_date | ||||
|  | ||||
| def determine_ext(url, default_ext=u'unknown_video'): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister