Move MixCloud into its own file
This commit is contained in:
		| @@ -33,6 +33,7 @@ from .extractor.googleplus import GooglePlusIE | |||||||
| from .extractor.googlesearch import GoogleSearchIE | from .extractor.googlesearch import GoogleSearchIE | ||||||
| from .extractor.infoq import InfoQIE | from .extractor.infoq import InfoQIE | ||||||
| from .extractor.metacafe import MetacafeIE | from .extractor.metacafe import MetacafeIE | ||||||
|  | from .extractor.mixcloud import MixcloudIE | ||||||
| from .extractor.mtv import MTVIE | from .extractor.mtv import MTVIE | ||||||
| from .extractor.myvideo import MyVideoIE | from .extractor.myvideo import MyVideoIE | ||||||
| from .extractor.nba import NBAIE | from .extractor.nba import NBAIE | ||||||
| @@ -49,110 +50,6 @@ from .extractor.zdf import ZDFIE | |||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class MixcloudIE(InfoExtractor): |  | ||||||
|     """Information extractor for www.mixcloud.com""" |  | ||||||
|  |  | ||||||
|     _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ |  | ||||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' |  | ||||||
|     IE_NAME = u'mixcloud' |  | ||||||
|  |  | ||||||
|     def report_download_json(self, file_id): |  | ||||||
|         """Report JSON download.""" |  | ||||||
|         self.to_screen(u'Downloading json') |  | ||||||
|  |  | ||||||
|     def get_urls(self, jsonData, fmt, bitrate='best'): |  | ||||||
|         """Get urls from 'audio_formats' section in json""" |  | ||||||
|         file_url = None |  | ||||||
|         try: |  | ||||||
|             bitrate_list = jsonData[fmt] |  | ||||||
|             if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: |  | ||||||
|                 bitrate = max(bitrate_list) # select highest |  | ||||||
|  |  | ||||||
|             url_list = jsonData[fmt][bitrate] |  | ||||||
|         except TypeError: # we have no bitrate info. |  | ||||||
|             url_list = jsonData[fmt] |  | ||||||
|         return url_list |  | ||||||
|  |  | ||||||
|     def check_urls(self, url_list): |  | ||||||
|         """Returns 1st active url from list""" |  | ||||||
|         for url in url_list: |  | ||||||
|             try: |  | ||||||
|                 compat_urllib_request.urlopen(url) |  | ||||||
|                 return url |  | ||||||
|             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |  | ||||||
|                 url = None |  | ||||||
|  |  | ||||||
|         return None |  | ||||||
|  |  | ||||||
|     def _print_formats(self, formats): |  | ||||||
|         print('Available formats:') |  | ||||||
|         for fmt in formats.keys(): |  | ||||||
|             for b in formats[fmt]: |  | ||||||
|                 try: |  | ||||||
|                     ext = formats[fmt][b][0] |  | ||||||
|                     print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) |  | ||||||
|                 except TypeError: # we have no bitrate info |  | ||||||
|                     ext = formats[fmt][0] |  | ||||||
|                     print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) |  | ||||||
|                     break |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         mobj = re.match(self._VALID_URL, url) |  | ||||||
|         if mobj is None: |  | ||||||
|             raise ExtractorError(u'Invalid URL: %s' % url) |  | ||||||
|         # extract uploader & filename from url |  | ||||||
|         uploader = mobj.group(1).decode('utf-8') |  | ||||||
|         file_id = uploader + "-" + mobj.group(2).decode('utf-8') |  | ||||||
|  |  | ||||||
|         # construct API request |  | ||||||
|         file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' |  | ||||||
|         # retrieve .json file with links to files |  | ||||||
|         request = compat_urllib_request.Request(file_url) |  | ||||||
|         try: |  | ||||||
|             self.report_download_json(file_url) |  | ||||||
|             jsonData = compat_urllib_request.urlopen(request).read() |  | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |  | ||||||
|             raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) |  | ||||||
|  |  | ||||||
|         # parse JSON |  | ||||||
|         json_data = json.loads(jsonData) |  | ||||||
|         player_url = json_data['player_swf_url'] |  | ||||||
|         formats = dict(json_data['audio_formats']) |  | ||||||
|  |  | ||||||
|         req_format = self._downloader.params.get('format', None) |  | ||||||
|         bitrate = None |  | ||||||
|  |  | ||||||
|         if self._downloader.params.get('listformats', None): |  | ||||||
|             self._print_formats(formats) |  | ||||||
|             return |  | ||||||
|  |  | ||||||
|         if req_format is None or req_format == 'best': |  | ||||||
|             for format_param in formats.keys(): |  | ||||||
|                 url_list = self.get_urls(formats, format_param) |  | ||||||
|                 # check urls |  | ||||||
|                 file_url = self.check_urls(url_list) |  | ||||||
|                 if file_url is not None: |  | ||||||
|                     break # got it! |  | ||||||
|         else: |  | ||||||
|             if req_format not in formats: |  | ||||||
|                 raise ExtractorError(u'Format is not available') |  | ||||||
|  |  | ||||||
|             url_list = self.get_urls(formats, req_format) |  | ||||||
|             file_url = self.check_urls(url_list) |  | ||||||
|             format_param = req_format |  | ||||||
|  |  | ||||||
|         return [{ |  | ||||||
|             'id': file_id.decode('utf-8'), |  | ||||||
|             'url': file_url.decode('utf-8'), |  | ||||||
|             'uploader': uploader.decode('utf-8'), |  | ||||||
|             'upload_date': None, |  | ||||||
|             'title': json_data['name'], |  | ||||||
|             'ext': file_url.split('.')[-1].decode('utf-8'), |  | ||||||
|             'format': (format_param is None and u'NA' or format_param.decode('utf-8')), |  | ||||||
|             'thumbnail': json_data['thumbnail_url'], |  | ||||||
|             'description': json_data['description'], |  | ||||||
|             'player_url': player_url.decode('utf-8'), |  | ||||||
|         }] |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										115
									
								
								youtube_dl/extractor/mixcloud.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								youtube_dl/extractor/mixcloud.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,115 @@ | |||||||
|  | import json | ||||||
|  | import re | ||||||
|  | import socket | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     compat_http_client, | ||||||
|  |     compat_str, | ||||||
|  |     compat_urllib_error, | ||||||
|  |     compat_urllib_request, | ||||||
|  |  | ||||||
|  |     ExtractorError, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class MixcloudIE(InfoExtractor): | ||||||
|  |     _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ | ||||||
|  |     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' | ||||||
|  |     IE_NAME = u'mixcloud' | ||||||
|  |  | ||||||
|  |     def report_download_json(self, file_id): | ||||||
|  |         """Report JSON download.""" | ||||||
|  |         self.to_screen(u'Downloading json') | ||||||
|  |  | ||||||
|  |     def get_urls(self, jsonData, fmt, bitrate='best'): | ||||||
|  |         """Get urls from 'audio_formats' section in json""" | ||||||
|  |         try: | ||||||
|  |             bitrate_list = jsonData[fmt] | ||||||
|  |             if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: | ||||||
|  |                 bitrate = max(bitrate_list) # select highest | ||||||
|  |  | ||||||
|  |             url_list = jsonData[fmt][bitrate] | ||||||
|  |         except TypeError: # we have no bitrate info. | ||||||
|  |             url_list = jsonData[fmt] | ||||||
|  |         return url_list | ||||||
|  |  | ||||||
|  |     def check_urls(self, url_list): | ||||||
|  |         """Returns 1st active url from list""" | ||||||
|  |         for url in url_list: | ||||||
|  |             try: | ||||||
|  |                 compat_urllib_request.urlopen(url) | ||||||
|  |                 return url | ||||||
|  |             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error): | ||||||
|  |                 url = None | ||||||
|  |  | ||||||
|  |         return None | ||||||
|  |  | ||||||
|  |     def _print_formats(self, formats): | ||||||
|  |         print('Available formats:') | ||||||
|  |         for fmt in formats.keys(): | ||||||
|  |             for b in formats[fmt]: | ||||||
|  |                 try: | ||||||
|  |                     ext = formats[fmt][b][0] | ||||||
|  |                     print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) | ||||||
|  |                 except TypeError: # we have no bitrate info | ||||||
|  |                     ext = formats[fmt][0] | ||||||
|  |                     print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) | ||||||
|  |                     break | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         if mobj is None: | ||||||
|  |             raise ExtractorError(u'Invalid URL: %s' % url) | ||||||
|  |         # extract uploader & filename from url | ||||||
|  |         uploader = mobj.group(1).decode('utf-8') | ||||||
|  |         file_id = uploader + "-" + mobj.group(2).decode('utf-8') | ||||||
|  |  | ||||||
|  |         # construct API request | ||||||
|  |         file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' | ||||||
|  |         # retrieve .json file with links to files | ||||||
|  |         request = compat_urllib_request.Request(file_url) | ||||||
|  |         try: | ||||||
|  |             self.report_download_json(file_url) | ||||||
|  |             jsonData = compat_urllib_request.urlopen(request).read() | ||||||
|  |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|  |             raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) | ||||||
|  |  | ||||||
|  |         # parse JSON | ||||||
|  |         json_data = json.loads(jsonData) | ||||||
|  |         player_url = json_data['player_swf_url'] | ||||||
|  |         formats = dict(json_data['audio_formats']) | ||||||
|  |  | ||||||
|  |         req_format = self._downloader.params.get('format', None) | ||||||
|  |  | ||||||
|  |         if self._downloader.params.get('listformats', None): | ||||||
|  |             self._print_formats(formats) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         if req_format is None or req_format == 'best': | ||||||
|  |             for format_param in formats.keys(): | ||||||
|  |                 url_list = self.get_urls(formats, format_param) | ||||||
|  |                 # check urls | ||||||
|  |                 file_url = self.check_urls(url_list) | ||||||
|  |                 if file_url is not None: | ||||||
|  |                     break # got it! | ||||||
|  |         else: | ||||||
|  |             if req_format not in formats: | ||||||
|  |                 raise ExtractorError(u'Format is not available') | ||||||
|  |  | ||||||
|  |             url_list = self.get_urls(formats, req_format) | ||||||
|  |             file_url = self.check_urls(url_list) | ||||||
|  |             format_param = req_format | ||||||
|  |  | ||||||
|  |         return [{ | ||||||
|  |             'id': file_id.decode('utf-8'), | ||||||
|  |             'url': file_url.decode('utf-8'), | ||||||
|  |             'uploader': uploader.decode('utf-8'), | ||||||
|  |             'upload_date': None, | ||||||
|  |             'title': json_data['name'], | ||||||
|  |             'ext': file_url.split('.')[-1].decode('utf-8'), | ||||||
|  |             'format': (format_param is None and u'NA' or format_param.decode('utf-8')), | ||||||
|  |             'thumbnail': json_data['thumbnail_url'], | ||||||
|  |             'description': json_data['description'], | ||||||
|  |             'player_url': player_url.decode('utf-8'), | ||||||
|  |         }] | ||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister