Compare commits
	
		
			89 Commits
		
	
	
		
			2013.09.10
			...
			2013.09.20
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 63037593c0 | ||
|   | 7a878d47fa | ||
|   | bc4b900898 | ||
|   | c5e743f66f | ||
|   | 6c36d8d6fb | ||
|   | 71c82637e7 | ||
|   | 2dad310e2c | ||
|   | d0ae9e3a8d | ||
|   | a19413c311 | ||
|   | 1ef80b55dd | ||
|   | eb03f4dad3 | ||
|   | 830dd1944a | ||
|   | 1237c9a3a5 | ||
|   | 5d13df79a5 | ||
|   | 6523223a4c | ||
|   | 4a67aafb7e | ||
|   | f3f34c5b0f | ||
|   | 6ae8ee3f54 | ||
|   | e8f8e80097 | ||
|   | 4dc0ff3ecf | ||
|   | 4b6462fc1e | ||
|   | c4ece78564 | ||
|   | 0761d02b0b | ||
|   | 71c107fc57 | ||
|   | 7459e3a290 | ||
|   | f9e66fb993 | ||
|   | 6c603ccce3 | ||
|   | ef66b0c6ef | ||
|   | 22b50ecb2f | ||
|   | 5a6fecc3de | ||
|   | cdbccafed9 | ||
|   | e69ae5b9e7 | ||
|   | 92790f4e54 | ||
|   | 471a5ee908 | ||
|   | 19e1d35989 | ||
|   | 0b7f31184d | ||
|   | fad84d50fe | ||
|   | 9a1c32dc54 | ||
|   | a921f40799 | ||
|   | 74ac9bdd82 | ||
|   | 94518f2087 | ||
|   | 535f59bbcf | ||
|   | 71cedb3c0c | ||
|   | dd01d6558a | ||
|   | ce85f022d2 | ||
|   | ad94a6fe44 | ||
|   | 353ba14060 | ||
|   | 83de794223 | ||
|   | bfd5c93af9 | ||
|   | c247d87ef3 | ||
|   | 07ac9e2cc2 | ||
|   | 6bc520c207 | ||
|   | f1d20fa39f | ||
|   | e3dc22ca3a | ||
|   | d665f8d3cb | ||
|   | 055e6f3657 | ||
|   | ac4f319ba1 | ||
|   | 542cca0e8c | ||
|   | 6a2449df3b | ||
|   | 7fad1c6328 | ||
|   | d82134c339 | ||
|   | 54d39d8b2f | ||
|   | de7f3446e0 | ||
|   | f8e52269c1 | ||
|   | cf1dd0c59e | ||
|   | 22c8b52545 | ||
|   | 1f7dc42cd0 | ||
|   | aa8f2641da | ||
|   | 648d25d43d | ||
|   | df3e61003a | ||
|   | 6b361ad5ee | ||
|   | 5d8afe69f7 | ||
|   | 72836fcee4 | ||
|   | d6e203b3dc | ||
|   | b3f0e53048 | ||
|   | 06a401c845 | ||
|   | bd2dee6c67 | ||
|   | 18b4e04f1c | ||
|   | d80a064eff | ||
|   | d55de6eec2 | ||
|   | 69df680b97 | ||
|   | 447591e1ae | ||
|   | 33eb0ce4c4 | ||
|   | 505c28aac9 | ||
|   | 8377574c9c | ||
|   | 372297e713 | ||
|   | 953e32b2c1 | ||
|   | 5898e28272 | ||
|   | 67dfbc0cb9 | 
							
								
								
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -17,4 +17,10 @@ youtube-dl.tar.gz | ||||
| .coverage | ||||
| cover/ | ||||
| updates_key.pem | ||||
| *.egg-info | ||||
| *.egg-info | ||||
| *.srt | ||||
| *.sbv | ||||
| *.vtt | ||||
| *.flv | ||||
| *.mp4 | ||||
| *.part | ||||
|   | ||||
| @@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     -U, --update               update this program to latest version. Make sure | ||||
|                                that you have sufficient permissions (run with | ||||
|                                sudo if needed) | ||||
|     -i, --ignore-errors        continue on download errors | ||||
|     -i, --ignore-errors        continue on download errors, for example to to | ||||
|                                skip unavailable videos in a playlist | ||||
|     --dump-user-agent          display the current browser identification | ||||
|     --user-agent UA            specify a custom user agent | ||||
|     --referer REF              specify a custom referer, use if the video access | ||||
| @@ -123,10 +124,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                only) | ||||
|  | ||||
| ## Subtitle Options: | ||||
|     --write-sub                write subtitle file (currently youtube only) | ||||
|     --write-auto-sub           write automatic subtitle file (currently youtube | ||||
|                                only) | ||||
|     --only-sub                 [deprecated] alias of --skip-download | ||||
|     --write-sub                write subtitle file | ||||
|     --write-auto-sub           write automatic subtitle file (youtube only) | ||||
|     --all-subs                 downloads all the available subtitles of the | ||||
|                                video | ||||
|     --list-subs                lists all available subtitles for the video | ||||
|   | ||||
							
								
								
									
										405
									
								
								devscripts/buildserver.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										405
									
								
								devscripts/buildserver.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,405 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| from http.server import HTTPServer, BaseHTTPRequestHandler | ||||
| from socketserver import ThreadingMixIn | ||||
| import argparse | ||||
| import ctypes | ||||
| import functools | ||||
| import sys | ||||
| import threading | ||||
| import traceback | ||||
| import os.path | ||||
|  | ||||
|  | ||||
| class BuildHTTPServer(ThreadingMixIn, HTTPServer): | ||||
|     allow_reuse_address = True | ||||
|  | ||||
|  | ||||
| advapi32 = ctypes.windll.advapi32 | ||||
|  | ||||
| SC_MANAGER_ALL_ACCESS = 0xf003f | ||||
| SC_MANAGER_CREATE_SERVICE = 0x02 | ||||
| SERVICE_WIN32_OWN_PROCESS = 0x10 | ||||
| SERVICE_AUTO_START = 0x2 | ||||
| SERVICE_ERROR_NORMAL = 0x1 | ||||
| DELETE = 0x00010000 | ||||
| SERVICE_STATUS_START_PENDING = 0x00000002 | ||||
| SERVICE_STATUS_RUNNING = 0x00000004 | ||||
| SERVICE_ACCEPT_STOP = 0x1 | ||||
|  | ||||
| SVCNAME = 'youtubedl_builder' | ||||
|  | ||||
| LPTSTR = ctypes.c_wchar_p | ||||
| START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) | ||||
|  | ||||
|  | ||||
| class SERVICE_TABLE_ENTRY(ctypes.Structure): | ||||
|     _fields_ = [ | ||||
|         ('lpServiceName', LPTSTR), | ||||
|         ('lpServiceProc', START_CALLBACK) | ||||
|     ] | ||||
|  | ||||
|  | ||||
| HandlerEx = ctypes.WINFUNCTYPE( | ||||
|     ctypes.c_int,     # return | ||||
|     ctypes.c_int,     # dwControl | ||||
|     ctypes.c_int,     # dwEventType | ||||
|     ctypes.c_void_p,  # lpEventData, | ||||
|     ctypes.c_void_p,  # lpContext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def _ctypes_array(c_type, py_array): | ||||
|     ar = (c_type * len(py_array))() | ||||
|     ar[:] = py_array | ||||
|     return ar | ||||
|  | ||||
|  | ||||
| def win_OpenSCManager(): | ||||
|     res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) | ||||
|     if not res: | ||||
|         raise Exception('Opening service manager failed - ' | ||||
|                         'are you running this as administrator?') | ||||
|     return res | ||||
|  | ||||
|  | ||||
| def win_install_service(service_name, cmdline): | ||||
|     manager = win_OpenSCManager() | ||||
|     try: | ||||
|         h = advapi32.CreateServiceW( | ||||
|             manager, service_name, None, | ||||
|             SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, | ||||
|             SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, | ||||
|             cmdline, None, None, None, None, None) | ||||
|         if not h: | ||||
|             raise OSError('Service creation failed: %s' % ctypes.FormatError()) | ||||
|  | ||||
|         advapi32.CloseServiceHandle(h) | ||||
|     finally: | ||||
|         advapi32.CloseServiceHandle(manager) | ||||
|  | ||||
|  | ||||
| def win_uninstall_service(service_name): | ||||
|     manager = win_OpenSCManager() | ||||
|     try: | ||||
|         h = advapi32.OpenServiceW(manager, service_name, DELETE) | ||||
|         if not h: | ||||
|             raise OSError('Could not find service %s: %s' % ( | ||||
|                 service_name, ctypes.FormatError())) | ||||
|  | ||||
|         try: | ||||
|             if not advapi32.DeleteService(h): | ||||
|                 raise OSError('Deletion failed: %s' % ctypes.FormatError()) | ||||
|         finally: | ||||
|             advapi32.CloseServiceHandle(h) | ||||
|     finally: | ||||
|         advapi32.CloseServiceHandle(manager) | ||||
|  | ||||
|  | ||||
| def win_service_report_event(service_name, msg, is_error=True): | ||||
|     with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: | ||||
|         f.write(msg + '\n') | ||||
|  | ||||
|     event_log = advapi32.RegisterEventSourceW(None, service_name) | ||||
|     if not event_log: | ||||
|         raise OSError('Could not report event: %s' % ctypes.FormatError()) | ||||
|  | ||||
|     try: | ||||
|         type_id = 0x0001 if is_error else 0x0004 | ||||
|         event_id = 0xc0000000 if is_error else 0x40000000 | ||||
|         lines = _ctypes_array(LPTSTR, [msg]) | ||||
|  | ||||
|         if not advapi32.ReportEventW( | ||||
|                 event_log, type_id, 0, event_id, None, len(lines), 0, | ||||
|                 lines, None): | ||||
|             raise OSError('Event reporting failed: %s' % ctypes.FormatError()) | ||||
|     finally: | ||||
|         advapi32.DeregisterEventSource(event_log) | ||||
|  | ||||
|  | ||||
| def win_service_handler(stop_event, *args): | ||||
|     try: | ||||
|         raise ValueError('Handler called with args ' + repr(args)) | ||||
|         TODO | ||||
|     except Exception as e: | ||||
|         tb = traceback.format_exc() | ||||
|         msg = str(e) + '\n' + tb | ||||
|         win_service_report_event(service_name, msg, is_error=True) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| def win_service_set_status(handle, status_code): | ||||
|     svcStatus = SERVICE_STATUS() | ||||
|     svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS | ||||
|     svcStatus.dwCurrentState = status_code | ||||
|     svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP | ||||
|  | ||||
|     svcStatus.dwServiceSpecificExitCode = 0 | ||||
|  | ||||
|     if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): | ||||
|         raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) | ||||
|  | ||||
|  | ||||
| def win_service_main(service_name, real_main, argc, argv_raw): | ||||
|     try: | ||||
|         #args = [argv_raw[i].value for i in range(argc)] | ||||
|         stop_event = threading.Event() | ||||
|         handler = HandlerEx(functools.partial(stop_event, win_service_handler)) | ||||
|         h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) | ||||
|         if not h: | ||||
|             raise OSError('Handler registration failed: %s' % | ||||
|                           ctypes.FormatError()) | ||||
|  | ||||
|         TODO | ||||
|     except Exception as e: | ||||
|         tb = traceback.format_exc() | ||||
|         msg = str(e) + '\n' + tb | ||||
|         win_service_report_event(service_name, msg, is_error=True) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| def win_service_start(service_name, real_main): | ||||
|     try: | ||||
|         cb = START_CALLBACK( | ||||
|             functools.partial(win_service_main, service_name, real_main)) | ||||
|         dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ | ||||
|             SERVICE_TABLE_ENTRY( | ||||
|                 service_name, | ||||
|                 cb | ||||
|             ), | ||||
|             SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) | ||||
|         ]) | ||||
|  | ||||
|         if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): | ||||
|             raise OSError('ctypes start failed: %s' % ctypes.FormatError()) | ||||
|     except Exception as e: | ||||
|         tb = traceback.format_exc() | ||||
|         msg = str(e) + '\n' + tb | ||||
|         win_service_report_event(service_name, msg, is_error=True) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| def main(args=None): | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument('-i', '--install', | ||||
|                         action='store_const', dest='action', const='install', | ||||
|                         help='Launch at Windows startup') | ||||
|     parser.add_argument('-u', '--uninstall', | ||||
|                         action='store_const', dest='action', const='uninstall', | ||||
|                         help='Remove Windows service') | ||||
|     parser.add_argument('-s', '--service', | ||||
|                         action='store_const', dest='action', const='service', | ||||
|                         help='Run as a Windows service') | ||||
|     parser.add_argument('-b', '--bind', metavar='<host:port>', | ||||
|                         action='store', default='localhost:8142', | ||||
|                         help='Bind to host:port (default %default)') | ||||
|     options = parser.parse_args(args=args) | ||||
|  | ||||
|     if options.action == 'install': | ||||
|         fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') | ||||
|         cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) | ||||
|         win_install_service(SVCNAME, cmdline) | ||||
|         return | ||||
|  | ||||
|     if options.action == 'uninstall': | ||||
|         win_uninstall_service(SVCNAME) | ||||
|         return | ||||
|  | ||||
|     if options.action == 'service': | ||||
|         win_service_start(SVCNAME, main) | ||||
|         return | ||||
|  | ||||
|     host, port_str = options.bind.split(':') | ||||
|     port = int(port_str) | ||||
|  | ||||
|     print('Listening on %s:%d' % (host, port)) | ||||
|     srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) | ||||
|     thr = threading.Thread(target=srv.serve_forever) | ||||
|     thr.start() | ||||
|     input('Press ENTER to shut down') | ||||
|     srv.shutdown() | ||||
|     thr.join() | ||||
|  | ||||
|  | ||||
| def rmtree(path): | ||||
|     for name in os.listdir(path): | ||||
|         fname = os.path.join(path, name) | ||||
|         if os.path.isdir(fname): | ||||
|             rmtree(fname) | ||||
|         else: | ||||
|             os.chmod(fname, 0o666) | ||||
|             os.remove(fname) | ||||
|     os.rmdir(path) | ||||
|  | ||||
| #============================================================================== | ||||
|  | ||||
| class BuildError(Exception): | ||||
|     def __init__(self, output, code=500): | ||||
|         self.output = output | ||||
|         self.code = code | ||||
|  | ||||
|     def __str__(self): | ||||
|         return self.output | ||||
|  | ||||
|  | ||||
| class HTTPError(BuildError): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class PythonBuilder(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         pythonVersion = kwargs.pop('python', '2.7') | ||||
|         try: | ||||
|             key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) | ||||
|             try: | ||||
|                 self.pythonPath, _ = _winreg.QueryValueEx(key, '') | ||||
|             finally: | ||||
|                 _winreg.CloseKey(key) | ||||
|         except Exception: | ||||
|             raise BuildError('No such Python version: %s' % pythonVersion) | ||||
|  | ||||
|         super(PythonBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|  | ||||
| class GITInfoBuilder(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         try: | ||||
|             self.user, self.repoName = kwargs['path'][:2] | ||||
|             self.rev = kwargs.pop('rev') | ||||
|         except ValueError: | ||||
|             raise BuildError('Invalid path') | ||||
|         except KeyError as e: | ||||
|             raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) | ||||
|  | ||||
|         path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) | ||||
|         if not os.path.exists(path): | ||||
|             os.makedirs(path) | ||||
|         self.basePath = tempfile.mkdtemp(dir=path) | ||||
|         self.buildPath = os.path.join(self.basePath, 'build') | ||||
|  | ||||
|         super(GITInfoBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|  | ||||
| class GITBuilder(GITInfoBuilder): | ||||
|     def build(self): | ||||
|         try: | ||||
|             subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) | ||||
|             subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) | ||||
|         except subprocess.CalledProcessError as e: | ||||
|             raise BuildError(e.output) | ||||
|  | ||||
|         super(GITBuilder, self).build() | ||||
|  | ||||
|  | ||||
| class YoutubeDLBuilder(object): | ||||
|     authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] | ||||
|  | ||||
|     def __init__(self, **kwargs): | ||||
|         if self.repoName != 'youtube-dl': | ||||
|             raise BuildError('Invalid repository "%s"' % self.repoName) | ||||
|         if self.user not in self.authorizedUsers: | ||||
|             raise HTTPError('Unauthorized user "%s"' % self.user, 401) | ||||
|  | ||||
|         super(YoutubeDLBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|     def build(self): | ||||
|         try: | ||||
|             subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], | ||||
|                                     cwd=self.buildPath) | ||||
|         except subprocess.CalledProcessError as e: | ||||
|             raise BuildError(e.output) | ||||
|  | ||||
|         super(YoutubeDLBuilder, self).build() | ||||
|  | ||||
|  | ||||
| class DownloadBuilder(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         self.handler = kwargs.pop('handler') | ||||
|         self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) | ||||
|         self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) | ||||
|         if not self.srcPath.startswith(self.buildPath): | ||||
|             raise HTTPError(self.srcPath, 401) | ||||
|  | ||||
|         super(DownloadBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|     def build(self): | ||||
|         if not os.path.exists(self.srcPath): | ||||
|             raise HTTPError('No such file', 404) | ||||
|         if os.path.isdir(self.srcPath): | ||||
|             raise HTTPError('Is a directory: %s' % self.srcPath, 401) | ||||
|  | ||||
|         self.handler.send_response(200) | ||||
|         self.handler.send_header('Content-Type', 'application/octet-stream') | ||||
|         self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) | ||||
|         self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) | ||||
|         self.handler.end_headers() | ||||
|  | ||||
|         with open(self.srcPath, 'rb') as src: | ||||
|             shutil.copyfileobj(src, self.handler.wfile) | ||||
|  | ||||
|         super(DownloadBuilder, self).build() | ||||
|  | ||||
|  | ||||
| class CleanupTempDir(object): | ||||
|     def build(self): | ||||
|         try: | ||||
|             rmtree(self.basePath) | ||||
|         except Exception as e: | ||||
|             print('WARNING deleting "%s": %s' % (self.basePath, e)) | ||||
|  | ||||
|         super(CleanupTempDir, self).build() | ||||
|  | ||||
|  | ||||
| class Null(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         pass | ||||
|  | ||||
|     def start(self): | ||||
|         pass | ||||
|  | ||||
|     def close(self): | ||||
|         pass | ||||
|  | ||||
|     def build(self): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class BuildHTTPRequestHandler(BaseHTTPRequestHandler): | ||||
|     actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching. | ||||
|  | ||||
|     def do_GET(self): | ||||
|         path = urlparse.urlparse(self.path) | ||||
|         paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) | ||||
|         action, _, path = path.path.strip('/').partition('/') | ||||
|         if path: | ||||
|             path = path.split('/') | ||||
|             if action in self.actionDict: | ||||
|                 try: | ||||
|                     builder = self.actionDict[action](path=path, handler=self, **paramDict) | ||||
|                     builder.start() | ||||
|                     try: | ||||
|                         builder.build() | ||||
|                     finally: | ||||
|                         builder.close() | ||||
|                 except BuildError as e: | ||||
|                     self.send_response(e.code) | ||||
|                     msg = unicode(e).encode('UTF-8') | ||||
|                     self.send_header('Content-Type', 'text/plain; charset=UTF-8') | ||||
|                     self.send_header('Content-Length', len(msg)) | ||||
|                     self.end_headers() | ||||
|                     self.wfile.write(msg) | ||||
|                 except HTTPError as e: | ||||
|                     self.send_response(e.code, str(e)) | ||||
|             else: | ||||
|                 self.send_response(500, 'Unknown build method "%s"' % action) | ||||
|         else: | ||||
|             self.send_response(500, 'Malformed URL') | ||||
|  | ||||
| #============================================================================== | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
| @@ -14,7 +14,7 @@ def main(): | ||||
|         template = tmplf.read() | ||||
|  | ||||
|     ie_htmls = [] | ||||
|     for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME): | ||||
|     for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): | ||||
|         ie_html = '<b>{}</b>'.format(ie.IE_NAME) | ||||
|         try: | ||||
|             ie_html += ': {}'.format(ie.IE_DESC) | ||||
|   | ||||
| @@ -55,8 +55,8 @@ git push origin "$version" | ||||
| /bin/echo -e "\n### OK, now it is time to build the binaries..." | ||||
| REV=$(git rev-parse HEAD) | ||||
| make youtube-dl youtube-dl.tar.gz | ||||
| wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \ | ||||
| 	wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe | ||||
| read -p "VM running? (y/n) " -n 1 | ||||
| wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe | ||||
| mkdir -p "build/$version" | ||||
| mv youtube-dl youtube-dl.exe "build/$version" | ||||
| mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" | ||||
|   | ||||
| @@ -1,10 +1,14 @@ | ||||
| #!/usr/bin/env python | ||||
| # encoding: utf-8 | ||||
|  | ||||
| # Generate youtube signature algorithm from test cases | ||||
|  | ||||
| import sys | ||||
|  | ||||
| tests = [ | ||||
|     # 93 - vfl79wBKW 2013/07/20 | ||||
|     (u"qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"€", | ||||
|      u".>/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ098765'321mnbvcxzasdfghjklpoiu"), | ||||
|     # 92 - vflQw-fB4 2013/07/17 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"", | ||||
|      "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"), | ||||
| @@ -23,18 +27,18 @@ tests = [ | ||||
|     # 86 - vfluy6kdb 2013/09/06 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", | ||||
|      "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"), | ||||
|     # 85 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", | ||||
|      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), | ||||
|     # 85 - vflkuzxcs 2013/09/11 | ||||
|     ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[', | ||||
|      '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'), | ||||
|     # 84 - vflg0g8PQ 2013/08/29 (sporadic) | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", | ||||
|      ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), | ||||
|     # 83 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", | ||||
|      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), | ||||
|     # 82 - vflZK4ZYR 2013/08/23 | ||||
|     # 82 - vflGNjMhJ 2013/09/12 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", | ||||
|      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), | ||||
|      ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"), | ||||
|     # 81 - vflLC8JvQ 2013/07/25 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", | ||||
|      "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), | ||||
|   | ||||
| @@ -38,7 +38,6 @@ | ||||
|     "writedescription": false,  | ||||
|     "writeinfojson": true,  | ||||
|     "writesubtitles": false, | ||||
|     "onlysubtitles": false, | ||||
|     "allsubtitles": false, | ||||
|     "listssubtitles": false | ||||
| } | ||||
|   | ||||
| @@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 | ||||
|         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) | ||||
|         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) | ||||
|         self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) | ||||
|  | ||||
|     def test_youtube_channel_matching(self): | ||||
|         assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) | ||||
|   | ||||
							
								
								
									
										71
									
								
								test/test_dailymotion_subtitles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								test/test_dailymotion_subtitles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
| import json | ||||
| import io | ||||
| import hashlib | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import DailymotionIE | ||||
| from youtube_dl.utils import * | ||||
| from helper import FakeYDL | ||||
|  | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestDailymotionSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.DL = FakeYDL() | ||||
|         self.url = 'http://www.dailymotion.com/video/xczg00' | ||||
|     def getInfoDict(self): | ||||
|         IE = DailymotionIE(self.DL) | ||||
|         info_dict = IE.extract(self.url) | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles'] | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 5) | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|     def test_nosubtitles(self): | ||||
|         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -8,7 +8,7 @@ import json | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE | ||||
| from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE | ||||
| from youtube_dl.utils import * | ||||
|  | ||||
| from helper import FakeYDL | ||||
| @@ -34,5 +34,21 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['title'], u'Vimeo Tributes') | ||||
|         self.assertTrue(len(result['entries']) > 24) | ||||
|  | ||||
|     def test_ustream_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = UstreamChannelIE(dl) | ||||
|         result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], u'5124905') | ||||
|         self.assertTrue(len(result['entries']) >= 11) | ||||
|  | ||||
|     def test_soundcloud_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = SoundcloudUserIE(dl) | ||||
|         result = ie.extract('https://soundcloud.com/the-concept-band') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], u'9615865') | ||||
|         self.assertTrue(len(result['entries']) >= 12) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -11,13 +11,16 @@ import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| #from youtube_dl.utils import htmlentity_transform | ||||
| from youtube_dl.utils import timeconvert | ||||
| from youtube_dl.utils import sanitize_filename | ||||
| from youtube_dl.utils import unescapeHTML | ||||
| from youtube_dl.utils import orderedSet | ||||
| from youtube_dl.utils import DateRange | ||||
| from youtube_dl.utils import unified_strdate | ||||
| from youtube_dl.utils import find_xpath_attr | ||||
| from youtube_dl.utils import ( | ||||
|     timeconvert, | ||||
|     sanitize_filename, | ||||
|     unescapeHTML, | ||||
|     orderedSet, | ||||
|     DateRange, | ||||
|     unified_strdate, | ||||
|     find_xpath_attr, | ||||
|     get_meta_content, | ||||
| ) | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
|     _compat_str = lambda b: b.decode('unicode-escape') | ||||
| @@ -127,5 +130,16 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) | ||||
|         self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) | ||||
|  | ||||
|     def test_meta_parser(self): | ||||
|         testhtml = u''' | ||||
|         <head> | ||||
|             <meta name="description" content="foo & bar"> | ||||
|             <meta content='Plato' name='author'/> | ||||
|         </head> | ||||
|         ''' | ||||
|         get_meta = lambda name: get_meta_content(name, testhtml) | ||||
|         self.assertEqual(get_meta('description'), u'foo & bar') | ||||
|         self.assertEqual(get_meta('author'), 'Plato') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -18,85 +18,65 @@ md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestYoutubeSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = False | ||||
|         DL.params['writesubtitles'] = False | ||||
|         DL.params['subtitlesformat'] = 'srt' | ||||
|         DL.params['listsubtitles'] = False | ||||
|     def test_youtube_no_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = False | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.DL = FakeYDL() | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|     def getInfoDict(self): | ||||
|         IE = YoutubeIE(self.DL) | ||||
|         info_dict = IE.extract(self.url) | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles']         | ||||
|     def test_youtube_no_writesubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = False | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_youtube_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_it(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitleslangs'] = ['it'] | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['it'] | ||||
|         self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_onlysubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['onlysubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_allsubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 13) | ||||
|     def test_youtube_subtitles_sbv_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'sbv' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitlesformat'] = 'sbv' | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|     def test_youtube_subtitles_vtt_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'vtt' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles']['en'] | ||||
|         self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitlesformat'] = 'vtt' | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|     def test_youtube_list_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['listsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|     def test_youtube_automatic_captions(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writeautomaticsub'] = True | ||||
|         DL.params['subtitleslangs'] = ['it'] | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('8YoUxe5ncPo') | ||||
|         sub = info_dict[0]['subtitles']['it'] | ||||
|         self.assertTrue(sub is not None) | ||||
|         self.url = '8YoUxe5ncPo' | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(subtitles['it'] is not None) | ||||
|     def test_youtube_nosubtitles(self): | ||||
|         self.url = 'sAjKT8FhjI8' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|     def test_youtube_multiple_langs(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['it', 'fr', 'de'] | ||||
|         DL.params['subtitleslangs'] = langs | ||||
|         IE = YoutubeIE(DL) | ||||
|         subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|   | ||||
| @@ -74,6 +74,7 @@ class YoutubeDL(object): | ||||
|     writesubtitles:    Write the video subtitles to a file | ||||
|     writeautomaticsub: Write the automatic subtitles to a file | ||||
|     allsubtitles:      Downloads all the subtitles of the video | ||||
|                        (requires writesubtitles or writeautomaticsub) | ||||
|     listsubtitles:     Lists all available subtitles for the video | ||||
|     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt) | ||||
|     subtitleslangs:    List of languages of the subtitles to download | ||||
| @@ -141,14 +142,10 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def to_screen(self, message, skip_eol=False): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
|         assert type(message) == type(u'') | ||||
|         if not self.params.get('quiet', False): | ||||
|             terminator = [u'\n', u''][skip_eol] | ||||
|             output = message + terminator | ||||
|             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|                 output = output.encode(preferredencoding(), 'ignore') | ||||
|             self._screen_file.write(output) | ||||
|             self._screen_file.flush() | ||||
|             write_string(output, self._screen_file) | ||||
|  | ||||
|     def to_stderr(self, message): | ||||
|         """Print message to stderr.""" | ||||
| @@ -492,13 +489,14 @@ class YoutubeDL(object): | ||||
|                 self.report_writedescription(descfn) | ||||
|                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: | ||||
|                     descfile.write(info_dict['description']) | ||||
|             except (KeyError, TypeError): | ||||
|                 self.report_warning(u'There\'s no description to write.') | ||||
|             except (OSError, IOError): | ||||
|                 self.report_error(u'Cannot write description file ' + descfn) | ||||
|                 return | ||||
|  | ||||
|         subtitles_are_requested = any([self.params.get('writesubtitles', False), | ||||
|                                        self.params.get('writeautomaticsub'), | ||||
|                                        self.params.get('allsubtitles', False)]) | ||||
|                                        self.params.get('writeautomaticsub')]) | ||||
|  | ||||
|         if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|   | ||||
| @@ -29,6 +29,8 @@ __authors__  = ( | ||||
|     'Albert Kim', | ||||
|     'Pierre Rudloff', | ||||
|     'Huarong Huo', | ||||
|     'Ismael Mejía', | ||||
|     'Steffan \'Ruirize\' James', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -148,7 +150,7 @@ def parseOpts(overrideArguments=None): | ||||
|     general.add_option('-U', '--update', | ||||
|             action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') | ||||
|     general.add_option('-i', '--ignore-errors', | ||||
|             action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) | ||||
|             action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) | ||||
|     general.add_option('--dump-user-agent', | ||||
|             action='store_true', dest='dump_user_agent', | ||||
|             help='display the current browser identification', default=False) | ||||
| @@ -205,13 +207,10 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|     subtitles.add_option('--write-sub', '--write-srt', | ||||
|             action='store_true', dest='writesubtitles', | ||||
|             help='write subtitle file (currently youtube only)', default=False) | ||||
|             help='write subtitle file', default=False) | ||||
|     subtitles.add_option('--write-auto-sub', '--write-automatic-sub', | ||||
|             action='store_true', dest='writeautomaticsub', | ||||
|             help='write automatic subtitle file (currently youtube only)', default=False) | ||||
|     subtitles.add_option('--only-sub', | ||||
|             action='store_true', dest='skip_download', | ||||
|             help='[deprecated] alias of --skip-download', default=False) | ||||
|             help='write automatic subtitle file (youtube only)', default=False) | ||||
|     subtitles.add_option('--all-subs', | ||||
|             action='store_true', dest='allsubtitles', | ||||
|             help='downloads all the available subtitles of the video', default=False) | ||||
| @@ -222,7 +221,7 @@ def parseOpts(overrideArguments=None): | ||||
|             action='store', dest='subtitlesformat', metavar='FORMAT', | ||||
|             help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') | ||||
|     subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', | ||||
|             action='callback', dest='subtitleslang', metavar='LANGS', type='str', | ||||
|             action='callback', dest='subtitleslangs', metavar='LANGS', type='str', | ||||
|             default=[], callback=_comma_separated_values_options_callback, | ||||
|             help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') | ||||
|  | ||||
| @@ -356,7 +355,7 @@ def parseOpts(overrideArguments=None): | ||||
|     if overrideArguments is not None: | ||||
|         opts, args = parser.parse_args(overrideArguments) | ||||
|         if opts.verbose: | ||||
|             sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|             write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|     else: | ||||
|         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') | ||||
|         if xdg_config_home: | ||||
| @@ -369,9 +368,9 @@ def parseOpts(overrideArguments=None): | ||||
|         argv = systemConf + userConf + commandLineConf | ||||
|         opts, args = parser.parse_args(argv) | ||||
|         if opts.verbose: | ||||
|             sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') | ||||
|             sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') | ||||
|             sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') | ||||
|             write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') | ||||
|             write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') | ||||
|             write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') | ||||
|  | ||||
|     return parser, opts, args | ||||
|  | ||||
| @@ -394,7 +393,7 @@ def _real_main(argv=None): | ||||
|         except (IOError, OSError) as err: | ||||
|             if opts.verbose: | ||||
|                 traceback.print_exc() | ||||
|             sys.stderr.write(u'ERROR: unable to open cookie file\n') | ||||
|             write_string(u'ERROR: unable to open cookie file\n') | ||||
|             sys.exit(101) | ||||
|     # Set user agent | ||||
|     if opts.user_agent is not None: | ||||
| @@ -421,7 +420,7 @@ def _real_main(argv=None): | ||||
|             batchurls = [x.strip() for x in batchurls] | ||||
|             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] | ||||
|             if opts.verbose: | ||||
|                 sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') | ||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') | ||||
|         except IOError: | ||||
|             sys.exit(u'ERROR: batch file could not be read') | ||||
|     all_urls = batchurls + args | ||||
| @@ -535,6 +534,11 @@ def _real_main(argv=None): | ||||
|     else: | ||||
|         date = DateRange(opts.dateafter, opts.datebefore) | ||||
|  | ||||
|     # --all-sub automatically sets --write-sub if --write-auto-sub is not given | ||||
|     # this was the old behaviour if only --all-sub was given. | ||||
|     if opts.allsubtitles and (opts.writeautomaticsub == False): | ||||
|         opts.writesubtitles = True | ||||
|  | ||||
|     if sys.version_info < (3,): | ||||
|         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) | ||||
|         if opts.outtmpl is not None: | ||||
| @@ -593,7 +597,7 @@ def _real_main(argv=None): | ||||
|         'allsubtitles': opts.allsubtitles, | ||||
|         'listsubtitles': opts.listsubtitles, | ||||
|         'subtitlesformat': opts.subtitlesformat, | ||||
|         'subtitleslangs': opts.subtitleslang, | ||||
|         'subtitleslangs': opts.subtitleslangs, | ||||
|         'matchtitle': decodeOption(opts.matchtitle), | ||||
|         'rejecttitle': decodeOption(opts.rejecttitle), | ||||
|         'max_downloads': opts.max_downloads, | ||||
| @@ -608,7 +612,7 @@ def _real_main(argv=None): | ||||
|         }) | ||||
|  | ||||
|     if opts.verbose: | ||||
|         sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n') | ||||
|         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') | ||||
|         try: | ||||
|             sp = subprocess.Popen( | ||||
|                 ['git', 'rev-parse', '--short', 'HEAD'], | ||||
| @@ -617,14 +621,14 @@ def _real_main(argv=None): | ||||
|             out, err = sp.communicate() | ||||
|             out = out.decode().strip() | ||||
|             if re.match('[0-9a-f]+', out): | ||||
|                 sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n') | ||||
|                 write_string(u'[debug] Git HEAD: ' + out + u'\n') | ||||
|         except: | ||||
|             try: | ||||
|                 sys.exc_clear() | ||||
|             except: | ||||
|                 pass | ||||
|         sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') | ||||
|         sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') | ||||
|         write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') | ||||
|         write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') | ||||
|  | ||||
|     ydl.add_default_info_extractors() | ||||
|  | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from .arte import ArteTvIE | ||||
| from .auengine import AUEngineIE | ||||
| from .bandcamp import BandcampIE | ||||
| from .bliptv import BlipTVIE, BlipTVUserIE | ||||
| from .bloomberg import BloombergIE | ||||
| from .breakcom import BreakIE | ||||
| from .brightcove import BrightcoveIE | ||||
| from .c56 import C56IE | ||||
| @@ -28,7 +29,15 @@ from .eighttracks import EightTracksIE | ||||
| from .escapist import EscapistIE | ||||
| from .exfm import ExfmIE | ||||
| from .facebook import FacebookIE | ||||
| from .fktv import ( | ||||
|     FKTVIE, | ||||
|     FKTVPosteckeIE, | ||||
| ) | ||||
| from .flickr import FlickrIE | ||||
| from .francetv import ( | ||||
|     PluzzIE, | ||||
|     FranceTvInfoIE, | ||||
| ) | ||||
| from .freesound import FreesoundIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .gamespot import GameSpotIE | ||||
| @@ -48,6 +57,7 @@ from .jeuxvideo import JeuxVideoIE | ||||
| from .jukebox import JukeboxIE | ||||
| from .justintv import JustinTVIE | ||||
| from .kankan import KankanIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .keek import KeekIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import LivestreamIE | ||||
| @@ -62,6 +72,7 @@ from .myvideo import MyVideoIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import NBCNewsIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .pbs import PBSIE | ||||
| @@ -77,7 +88,8 @@ from .sina import SinaIE | ||||
| from .slashdot import SlashdotIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .sohu import SohuIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE | ||||
| from .southparkstudios import SouthParkStudiosIE | ||||
| from .spiegel import SpiegelIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .statigram import StatigramIE | ||||
| @@ -92,11 +104,12 @@ from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| from .unistra import UnistraIE | ||||
| from .ustream import UstreamIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| from .vbox7 import Vbox7IE | ||||
| from .veehd import VeeHDIE | ||||
| from .veoh import VeohIE | ||||
| from .vevo import VevoIE | ||||
| from .vice import ViceIE | ||||
| from .videofyme import VideofyMeIE | ||||
| from .vimeo import VimeoIE, VimeoChannelIE | ||||
| from .vine import VineIE | ||||
|   | ||||
| @@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor): | ||||
|             for fn,fdata in data['files'].items() | ||||
|             if 'Video' in fdata['format']] | ||||
|         formats.sort(key=lambda fdata: fdata['file_size']) | ||||
|         for f in formats: | ||||
|             f['ext'] = determine_ext(f['url']) | ||||
|  | ||||
|         info = { | ||||
|             '_type': 'video', | ||||
| @@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor): | ||||
|             info['thumbnail'] = thumbnail | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info['url'] = formats[-1]['url'] | ||||
|         info['ext'] = determine_ext(formats[-1]['url']) | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|         return info | ||||
|   | ||||
							
								
								
									
										27
									
								
								youtube_dl/extractor/bloomberg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								youtube_dl/extractor/bloomberg.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BloombergIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', | ||||
|         u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', | ||||
|             u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # Requires ffmpeg (m3u8 manifest) | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         ooyala_url = self._og_search_video_url(webpage) | ||||
|         return self.url_result(ooyala_url, ie='Ooyala') | ||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class Canalc2IE(InfoExtractor): | ||||
|     _IE_NAME = 'canalc2.tv' | ||||
|     IE_NAME = 'canalc2.tv' | ||||
|     _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui' | ||||
|  | ||||
|     _TEST = { | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| @@ -5,24 +6,29 @@ from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' | ||||
|     IE_NAME = u'canalplus.fr' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861', | ||||
|         u'file': u'889861.flv', | ||||
|         u'md5': u'590a888158b5f0d6832f84001fbf3e99', | ||||
|         u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', | ||||
|         u'file': u'922470.flv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Le Petit Journal 20/06/13 - La guerre des drone', | ||||
|             u'upload_date': u'20130620', | ||||
|             u'title': u'Zapping - 26/08/13', | ||||
|             u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', | ||||
|             u'upload_date': u'20130826', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         if video_id is None: | ||||
|             webpage = self._download_webpage(url, mobj.group('path')) | ||||
|             video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') | ||||
|         info_url = self._VIDEO_INFO_TEMPLATE % video_id | ||||
|         info_page = self._download_webpage(info_url,video_id,  | ||||
|                                            u'Downloading video info') | ||||
| @@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor): | ||||
|                 'ext': 'flv', | ||||
|                 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), | ||||
|                 'thumbnail': media.find('IMAGES/GRAND').text, | ||||
|                 'description': infos.find('DESCRIPTION').text, | ||||
|                 'view_count': int(infos.find('NB_VUES').text), | ||||
|                 } | ||||
|   | ||||
| @@ -3,15 +3,19 @@ import json | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_id, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class DailymotionIE(InfoExtractor): | ||||
|  | ||||
| class DailymotionIE(SubtitlesInfoExtractor): | ||||
|     """Information Extractor for Dailymotion""" | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' | ||||
| @@ -73,6 +77,12 @@ class DailymotionIE(InfoExtractor): | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|         video_url = info[max_quality] | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id) | ||||
|             return | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
| @@ -80,9 +90,25 @@ class DailymotionIE(InfoExtractor): | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    self._og_search_title(webpage), | ||||
|             'ext':      video_extension, | ||||
|             'subtitles':    video_subtitles, | ||||
|             'thumbnail': info['thumbnail_url'] | ||||
|         }] | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         try: | ||||
|             sub_list = self._download_webpage( | ||||
|                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | ||||
|                 video_id, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         info = json.loads(sub_list) | ||||
|         if (info['total'] > 0): | ||||
|             sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | ||||
|             return sub_lang_list | ||||
|         self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|         return {} | ||||
|  | ||||
|  | ||||
| class DailymotionPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' | ||||
|   | ||||
| @@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor): | ||||
|             'width': int(fe.find('./width').text), | ||||
|             'height': int(fe.find('./height').text), | ||||
|             'url': fe.find('./url').text, | ||||
|             'ext': determine_ext(fe.find('./url').text), | ||||
|             'filesize': int(fe.find('./filesize').text), | ||||
|             'video_bitrate': int(fe.find('./videoBitrate').text), | ||||
|             '3sat_qualityname': fe.find('./quality').text, | ||||
| @@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info['url'] = formats[-1]['url'] | ||||
|         info['ext'] = determine_ext(formats[-1]['url']) | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|         return info | ||||
|   | ||||
							
								
								
									
										79
									
								
								youtube_dl/extractor/fktv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								youtube_dl/extractor/fktv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | ||||
| import re | ||||
| import random | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     get_element_by_id, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FKTVIE(InfoExtractor): | ||||
|     IE_NAME = u'fernsehkritik.tv' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://fernsehkritik.tv/folge-1', | ||||
|         u'file': u'00011.flv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Folge 1 vom 10. April 2007', | ||||
|             u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         episode = int(mobj.group('ep')) | ||||
|  | ||||
|         server = random.randint(2, 4) | ||||
|         video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode | ||||
|         start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode, | ||||
|             episode) | ||||
|         playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, | ||||
|             u'playlist', flags=re.DOTALL) | ||||
|         files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) | ||||
|         # TODO: return a single multipart video | ||||
|         videos = [] | ||||
|         for i, _ in enumerate(files, 1): | ||||
|             video_id = '%04d%d' % (episode, i) | ||||
|             video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i) | ||||
|             video_title = 'Fernsehkritik %d.%d' % (episode, i) | ||||
|             videos.append({ | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 'title': clean_html(get_element_by_id('eptitle', start_webpage)), | ||||
|                 'description': clean_html(get_element_by_id('contentlist', start_webpage)), | ||||
|                 'thumbnail': video_thumbnail | ||||
|             }) | ||||
|         return videos | ||||
|  | ||||
|  | ||||
| class FKTVPosteckeIE(InfoExtractor): | ||||
|     IE_NAME = u'fernsehkritik.tv:postecke' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', | ||||
|         u'file': u'0120.flv', | ||||
|         u'md5': u'262f0adbac80317412f7e57b4808e5c4', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Postecke 120" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         episode = int(mobj.group('ep')) | ||||
|  | ||||
|         server = random.randint(2, 4) | ||||
|         video_id = '%04d' % episode | ||||
|         video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) | ||||
|         video_title = 'Postecke %d' % episode | ||||
|         return { | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      determine_ext(video_url), | ||||
|             'title':    video_title, | ||||
|         } | ||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/francetv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/francetv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceTVBaseInfoExtractor(InfoExtractor): | ||||
|     def _extract_video(self, video_id): | ||||
|         xml_desc = self._download_webpage( | ||||
|             'http://www.francetvinfo.fr/appftv/webservices/video/' | ||||
|             'getInfosOeuvre.php?id-diffusion=' | ||||
|             + video_id, video_id, 'Downloading XML config') | ||||
|         info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8')) | ||||
|  | ||||
|         manifest_url = info.find('videos/video/url').text | ||||
|         video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') | ||||
|         video_url = video_url.replace('/z/', '/i/') | ||||
|         thumbnail_path = info.find('image').text | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'title': info.find('titre').text, | ||||
|                 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), | ||||
|                 'description': info.find('synopsis').text, | ||||
|                 } | ||||
|  | ||||
|  | ||||
| class PluzzIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'pluzz.francetv.fr' | ||||
|     _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' | ||||
|  | ||||
|     # Can't use tests, videos expire in 7 days | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         video_id = self._search_regex( | ||||
|             r'data-diffusion="(\d+)"', webpage, 'ID') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
| class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'francetvinfo.fr' | ||||
|     _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', | ||||
|         u'file': u'84981923.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Soir 3', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') | ||||
|         return self._extract_video(video_id) | ||||
| @@ -21,7 +21,7 @@ class FunnyOrDieIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"', | ||||
|         video_url = self._search_regex(r'type="video/mp4" src="(.*?)"', | ||||
|             webpage, u'video URL', flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|   | ||||
| @@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor): | ||||
|         u"file": u"6410818.mp4", | ||||
|         u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", | ||||
|         u"info_dict": { | ||||
|             u"title": u"Arma III - Community Guide: SITREP I", | ||||
|             u"title": u"Arma 3 - Community Guide: SITREP I", | ||||
|             u"upload_date": u"20130627",  | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor): | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Extract update date | ||||
|         upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>', | ||||
|         upload_date = self._html_search_regex( | ||||
|             ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'], | ||||
|             webpage, u'upload date', fatal=False) | ||||
|         if upload_date: | ||||
|             # Convert timestring to a format suitable for filename | ||||
|   | ||||
| @@ -7,11 +7,11 @@ from .common import InfoExtractor | ||||
| class HotNewHipHopIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html' | ||||
|     _TEST = { | ||||
|         u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", | ||||
|         u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html", | ||||
|         u'file': u'1435540.mp3', | ||||
|         u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Freddie Gibbs Songs - Lay It Down" | ||||
|             u"title": u"Freddie Gibbs - Lay It Down" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -19,8 +19,7 @@ class HowcastIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.howcast.com/videos/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|   | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/kickstarter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/kickstarter.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class KickStarterIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*' | ||||
|     _TEST = { | ||||
|         u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location", | ||||
|         u"file": u"1404461844.mp4", | ||||
|         u"md5": u"c81addca81327ffa66c642b5d8b08cab", | ||||
|         u"info_dict": { | ||||
|             u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling", | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|         webpage_src = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r'data-video="(.*?)">', | ||||
|             webpage_src, u'video URL') | ||||
|         if 'mp4' in video_url: | ||||
|             ext = 'mp4' | ||||
|         else: | ||||
|             ext = 'flv' | ||||
|         video_title = self._html_search_regex(r"<title>(.*?)</title>", | ||||
|             webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip() | ||||
|  | ||||
|         results = [{ | ||||
|                     'id': video_id, | ||||
|                     'url': video_url, | ||||
|                     'title': video_title, | ||||
|                     'ext': ext, | ||||
|                     }] | ||||
|         return results | ||||
| @@ -5,34 +5,27 @@ import socket | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MixcloudIE(InfoExtractor): | ||||
|     _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' | ||||
|     IE_NAME = u'mixcloud' | ||||
|  | ||||
|     def report_download_json(self, file_id): | ||||
|         """Report JSON download.""" | ||||
|         self.to_screen(u'Downloading json') | ||||
|  | ||||
|     def get_urls(self, jsonData, fmt, bitrate='best'): | ||||
|         """Get urls from 'audio_formats' section in json""" | ||||
|         try: | ||||
|             bitrate_list = jsonData[fmt] | ||||
|             if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: | ||||
|                 bitrate = max(bitrate_list) # select highest | ||||
|  | ||||
|             url_list = jsonData[fmt][bitrate] | ||||
|         except TypeError: # we have no bitrate info. | ||||
|             url_list = jsonData[fmt] | ||||
|         return url_list | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||
|         u'file': u'dholbach-cryptkeeper.mp3', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Cryptkeeper', | ||||
|             u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | ||||
|             u'uploader': u'Daniel Holbach', | ||||
|             u'uploader_id': u'dholbach', | ||||
|             u'upload_date': u'20111115', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def check_urls(self, url_list): | ||||
|         """Returns 1st active url from list""" | ||||
| @@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor): | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for fmt in formats.keys(): | ||||
|             for b in formats[fmt]: | ||||
|                 try: | ||||
|                     ext = formats[fmt][b][0] | ||||
|                     print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) | ||||
|                 except TypeError: # we have no bitrate info | ||||
|                     ext = formats[fmt][0] | ||||
|                     print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) | ||||
|                     break | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         # extract uploader & filename from url | ||||
|         uploader = mobj.group(1).decode('utf-8') | ||||
|         file_id = uploader + "-" + mobj.group(2).decode('utf-8') | ||||
|  | ||||
|         # construct API request | ||||
|         file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' | ||||
|         # retrieve .json file with links to files | ||||
|         request = compat_urllib_request.Request(file_url) | ||||
|         try: | ||||
|             self.report_download_json(file_url) | ||||
|             jsonData = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) | ||||
|         uploader = mobj.group(1) | ||||
|         cloudcast_name = mobj.group(2) | ||||
|         track_id = '-'.join((uploader, cloudcast_name)) | ||||
|         api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|         json_data = self._download_webpage(api_url, track_id, | ||||
|             u'Downloading cloudcast info') | ||||
|         info = json.loads(json_data) | ||||
|  | ||||
|         # parse JSON | ||||
|         json_data = json.loads(jsonData) | ||||
|         player_url = json_data['player_swf_url'] | ||||
|         formats = dict(json_data['audio_formats']) | ||||
|         preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') | ||||
|         song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') | ||||
|         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | ||||
|         final_song_url = self.check_urls(template_url % i for i in range(30)) | ||||
|  | ||||
|         req_format = self._downloader.params.get('format', None) | ||||
|  | ||||
|         if self._downloader.params.get('listformats', None): | ||||
|             self._print_formats(formats) | ||||
|             return | ||||
|  | ||||
|         if req_format is None or req_format == 'best': | ||||
|             for format_param in formats.keys(): | ||||
|                 url_list = self.get_urls(formats, format_param) | ||||
|                 # check urls | ||||
|                 file_url = self.check_urls(url_list) | ||||
|                 if file_url is not None: | ||||
|                     break # got it! | ||||
|         else: | ||||
|             if req_format not in formats: | ||||
|                 raise ExtractorError(u'Format is not available') | ||||
|  | ||||
|             url_list = self.get_urls(formats, req_format) | ||||
|             file_url = self.check_urls(url_list) | ||||
|             format_param = req_format | ||||
|  | ||||
|         return [{ | ||||
|             'id': file_id.decode('utf-8'), | ||||
|             'url': file_url.decode('utf-8'), | ||||
|             'uploader': uploader.decode('utf-8'), | ||||
|             'upload_date': None, | ||||
|             'title': json_data['name'], | ||||
|             'ext': file_url.split('.')[-1].decode('utf-8'), | ||||
|             'format': (format_param is None and u'NA' or format_param.decode('utf-8')), | ||||
|             'thumbnail': json_data['thumbnail_url'], | ||||
|             'description': json_data['description'], | ||||
|             'player_url': player_url.decode('utf-8'), | ||||
|         }] | ||||
|         return { | ||||
|             'id': track_id, | ||||
|             'title': info['name'], | ||||
|             'url': final_song_url, | ||||
|             'ext': 'mp3', | ||||
|             'description': info['description'], | ||||
|             'thumbnail': info['pictures'].get('extra_large'), | ||||
|             'uploader': info['user']['name'], | ||||
|             'uploader_id': info['user']['username'], | ||||
|             'upload_date': unified_strdate(info['created_time']), | ||||
|             'view_count': info['play_count'], | ||||
|         } | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/newgrounds.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/newgrounds.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class NewgroundsIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.newgrounds.com/audio/listen/549479', | ||||
|         u'file': u'549479.mp3', | ||||
|         u'md5': u'fe6033d297591288fa1c1f780386f07a', | ||||
|         u'info_dict': { | ||||
|             u"title": u"B7 - BusMode", | ||||
|             u"uploader": u"Burn7", | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         music_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, music_id) | ||||
|          | ||||
|         title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title') | ||||
|         uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader') | ||||
|          | ||||
|         music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}' | ||||
|         music_url_json = json.loads(music_url_json_string) | ||||
|         music_url = music_url_json['url'] | ||||
|  | ||||
|         return { | ||||
|             'id':       music_id, | ||||
|             'title':    title, | ||||
|             'url':      music_url, | ||||
|             'uploader': uploader, | ||||
|             'ext':      determine_ext(music_url), | ||||
|         } | ||||
| @@ -18,11 +18,15 @@ class OoyalaIE(InfoExtractor): | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _url_for_embed_code(embed_code): | ||||
|         return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code | ||||
|  | ||||
|     def _extract_result(self, info, more_info): | ||||
|         return {'id': info['embedCode'], | ||||
|                 'ext': 'mp4', | ||||
|                 'title': unescapeHTML(info['title']), | ||||
|                 'url': info['url'], | ||||
|                 'url': info.get('ipad_url') or info['url'], | ||||
|                 'description': unescapeHTML(more_info['description']), | ||||
|                 'thumbnail': more_info['promo'], | ||||
|                 } | ||||
| @@ -35,7 +39,9 @@ class OoyalaIE(InfoExtractor): | ||||
|         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', | ||||
|                                         player, u'mobile player url') | ||||
|         mobile_player = self._download_webpage(mobile_url, embedCode) | ||||
|         videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') | ||||
|         videos_info = self._search_regex( | ||||
|             r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', | ||||
|             mobile_player, u'info').replace('\\"','"') | ||||
|         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') | ||||
|         videos_info = json.loads(videos_info) | ||||
|         videos_more_info =json.loads(videos_more_info) | ||||
|   | ||||
| @@ -1,10 +1,12 @@ | ||||
| import json | ||||
| import re | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| @@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor): | ||||
|     def _resolv_url(cls, url): | ||||
|         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID | ||||
|  | ||||
|     def _extract_info_dict(self, info, full_title=None): | ||||
|     def _extract_info_dict(self, info, full_title=None, quiet=False): | ||||
|         video_id = info['id'] | ||||
|         name = full_title or video_id | ||||
|         self.report_extraction(name) | ||||
|         if quiet == False: | ||||
|             self.report_extraction(name) | ||||
|  | ||||
|         thumbnail = info['artwork_url'] | ||||
|         if thumbnail is not None: | ||||
| @@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE): | ||||
|                 'id': info['id'], | ||||
|                 'title': info['title'], | ||||
|                 } | ||||
|  | ||||
|  | ||||
| class SoundcloudUserIE(SoundcloudIE): | ||||
|     _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$' | ||||
|     IE_NAME = u'soundcloud:user' | ||||
|  | ||||
|     # it's in tests/test_playlists.py | ||||
|     _TEST = None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uploader = mobj.group('user') | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/' % uploader | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         user_json = self._download_webpage(resolv_url, uploader, | ||||
|             u'Downloading user info') | ||||
|         user = json.loads(user_json) | ||||
|  | ||||
|         tracks = [] | ||||
|         for i in itertools.count(): | ||||
|             data = compat_urllib_parse.urlencode({'offset': i*50, | ||||
|                                                   'client_id': self._CLIENT_ID, | ||||
|                                                   }) | ||||
|             tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data | ||||
|             response = self._download_webpage(tracks_url, uploader,  | ||||
|                 u'Downloading tracks page %s' % (i+1)) | ||||
|             new_tracks = json.loads(response) | ||||
|             tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) | ||||
|             if len(new_tracks) < 50: | ||||
|                 break | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': compat_str(user['id']), | ||||
|             'title': user['username'], | ||||
|             'entries': tracks, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										34
									
								
								youtube_dl/extractor/southparkstudios.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								youtube_dl/extractor/southparkstudios.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| import re | ||||
|  | ||||
| from .mtv import MTVIE, _media_xml_tag | ||||
|  | ||||
|  | ||||
| class SouthParkStudiosIE(MTVIE): | ||||
|     IE_NAME = u'southparkstudios.com' | ||||
|     _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)' | ||||
|  | ||||
|     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', | ||||
|         u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Bat Daded', | ||||
|             u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         return itemdoc.find(search_path).attrib['url'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', | ||||
|                                   webpage, u'mgid') | ||||
|         return self._get_videos_info(mgid) | ||||
							
								
								
									
										91
									
								
								youtube_dl/extractor/subtitles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								youtube_dl/extractor/subtitles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SubtitlesInfoExtractor(InfoExtractor): | ||||
|     @property | ||||
|     def _have_to_download_any_subtitles(self): | ||||
|         return any([self._downloader.params.get('writesubtitles', False), | ||||
|                     self._downloader.params.get('writeautomaticsub')]) | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id, webpage=None): | ||||
|         """ outputs the available subtitles for the video """ | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         auto_captions_list = self._get_available_automatic_caption(video_id, webpage) | ||||
|         sub_lang = ",".join(list(sub_lang_list.keys())) | ||||
|         self.to_screen(u'%s: Available subtitles for video: %s' % | ||||
|                        (video_id, sub_lang)) | ||||
|         auto_lang = ",".join(auto_captions_list.keys()) | ||||
|         self.to_screen(u'%s: Available automatic captions for video: %s' % | ||||
|                        (video_id, auto_lang)) | ||||
|  | ||||
|     def extract_subtitles(self, video_id, video_webpage=None): | ||||
|         """ | ||||
|         returns {sub_lang: sub} ,{} if subtitles not found or None if the | ||||
|         subtitles aren't requested. | ||||
|         """ | ||||
|         if not self._have_to_download_any_subtitles: | ||||
|             return None | ||||
|         available_subs_list = {} | ||||
|         if self._downloader.params.get('writeautomaticsub', False): | ||||
|             available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) | ||||
|         if self._downloader.params.get('writesubtitles', False): | ||||
|             available_subs_list.update(self._get_available_subtitles(video_id)) | ||||
|  | ||||
|         if not available_subs_list:  # error, it didn't get the available subtitles | ||||
|             return {} | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             sub_lang_list = available_subs_list | ||||
|         else: | ||||
|             if self._downloader.params.get('subtitleslangs', False): | ||||
|                 requested_langs = self._downloader.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs_list: | ||||
|                 requested_langs = ['en'] | ||||
|             else: | ||||
|                 requested_langs = [list(available_subs_list.keys())[0]] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for sub_lang in requested_langs: | ||||
|                 if not sub_lang in available_subs_list: | ||||
|                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) | ||||
|                     continue | ||||
|                 sub_lang_list[sub_lang] = available_subs_list[sub_lang] | ||||
|  | ||||
|         subtitles = {} | ||||
|         for sub_lang, url in sub_lang_list.items(): | ||||
|             subtitle = self._request_subtitle_url(sub_lang, url) | ||||
|             if subtitle: | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _request_subtitle_url(self, sub_lang, url): | ||||
|         """ makes the http request for the subtitle """ | ||||
|         try: | ||||
|             sub = self._download_webpage(url, None, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses that support automatic captions, | ||||
|         otherwise it will return {} | ||||
|         """ | ||||
|         self._downloader.report_warning(u'Automatic Captions not supported by this server') | ||||
|         return {} | ||||
| @@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor): | ||||
|             { | ||||
|                 'format': fnode.text, | ||||
|                 'url': video_url_template % fnode.text, | ||||
|                 'ext': fnode.text.partition('-')[0] | ||||
|             } | ||||
|  | ||||
|             for fnode in format_doc.findall('./formats/format') | ||||
| @@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info['url'] = formats[-1]['url'] | ||||
|         info['ext'] = formats[-1]['format'].partition('-')[0] | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -1,6 +1,11 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     get_meta_content, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class UstreamIE(InfoExtractor): | ||||
| @@ -43,3 +48,25 @@ class UstreamIE(InfoExtractor): | ||||
|                 'thumbnail': thumbnail, | ||||
|                } | ||||
|         return info | ||||
|  | ||||
| class UstreamChannelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' | ||||
|     IE_NAME = u'ustream:channel' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         slug = m.group('slug') | ||||
|         webpage = self._download_webpage(url, slug) | ||||
|         channel_id = get_meta_content('ustream:channel_id', webpage) | ||||
|  | ||||
|         BASE = 'http://www.ustream.tv' | ||||
|         next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id | ||||
|         video_ids = [] | ||||
|         while next_url: | ||||
|             reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id)) | ||||
|             video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data'])) | ||||
|             next_url = reply['nextUrl'] | ||||
|  | ||||
|         urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids] | ||||
|         url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls] | ||||
|         return self.playlist_result(url_entries, channel_id) | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/vice.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/vice.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class ViceIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', | ||||
|         u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # Requires ffmpeg (m3u8 manifest) | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         try: | ||||
|             ooyala_url = self._og_search_video_url(webpage) | ||||
|         except ExtractorError: | ||||
|             try: | ||||
|                 embed_code = self._search_regex( | ||||
|                     r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, | ||||
|                     u'ooyala embed code') | ||||
|                 ooyala_url = OoyalaIE._url_for_embed_code(embed_code) | ||||
|             except ExtractorError: | ||||
|                 raise ExtractorError(u'The page doesn\'t contain a video', expected=True) | ||||
|         return self.url_result(ooyala_url, ie='Ooyala') | ||||
|  | ||||
| @@ -11,8 +11,8 @@ from ..utils import ( | ||||
|  | ||||
| class XHamsterIE(InfoExtractor): | ||||
|     """Information Extractor for xHamster""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', | ||||
|         u'file': u'1509445.flv', | ||||
|         u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa', | ||||
| @@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor): | ||||
|             u"uploader_id": u"Ruseful2011",  | ||||
|             u"title": u"FemaleAgent Shy beauty takes the bait" | ||||
|         } | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', | ||||
|         u'file': u'2221348.flv', | ||||
|         u'md5': u'e767b9475de189320f691f49c679c4c7', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130914",  | ||||
|             u"uploader_id": u"jojo747400",  | ||||
|             u"title": u"Britney Spears  Sexy Booty" | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id | ||||
|         seo = mobj.group('seo') | ||||
|         mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo) | ||||
|         webpage = self._download_webpage(mrss_url, video_id) | ||||
|  | ||||
|         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) | ||||
|   | ||||
| @@ -5,8 +5,10 @@ import netrc | ||||
| import re | ||||
| import socket | ||||
| import itertools | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_parse_qs, | ||||
| @@ -130,13 +132,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             return | ||||
|         self._confirm_age() | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com' | ||||
|     _VALID_URL = r"""^ | ||||
|                      ( | ||||
|                          (?:https?://)?                                       # http(s):// (optional) | ||||
|                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| | ||||
|                             tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains | ||||
|                             tube\.majestyc\.net/| | ||||
|                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains | ||||
|                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls | ||||
|                          (?:                                                  # the various things that can precede the ID: | ||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||
| @@ -397,19 +401,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video info webpage' % video_id) | ||||
|  | ||||
|     def report_video_subtitles_download(self, video_id): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Checking available subtitles' % video_id) | ||||
|  | ||||
|     def report_video_subtitles_request(self, video_id, sub_lang, format): | ||||
|         """Report attempt to download video info webpage.""" | ||||
|         self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format)) | ||||
|  | ||||
|     def report_video_subtitles_available(self, video_id, sub_lang_list): | ||||
|         """Report available subtitles.""" | ||||
|         sub_lang = ",".join(list(sub_lang_list.keys())) | ||||
|         self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang)) | ||||
|  | ||||
|     def report_information_extraction(self, video_id): | ||||
|         """Report attempt to extract video information.""" | ||||
|         self.to_screen(u'%s: Extracting video information' % video_id) | ||||
| @@ -425,7 +416,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|     def _decrypt_signature(self, s): | ||||
|         """Turn the encrypted s field into a working signature""" | ||||
|  | ||||
|         if len(s) == 92: | ||||
|         if len(s) == 93: | ||||
|             return s[86:29:-1] + s[88] + s[28:5:-1] | ||||
|         elif len(s) == 92: | ||||
|             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] | ||||
|         elif len(s) == 90: | ||||
|             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] | ||||
| @@ -438,13 +431,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         elif len(s) == 86: | ||||
|             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53] | ||||
|         elif len(s) == 85: | ||||
|             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] | ||||
|             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] | ||||
|         elif len(s) == 84: | ||||
|             return s[81:36:-1] + s[0] + s[35:2:-1] | ||||
|         elif len(s) == 83: | ||||
|             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] | ||||
|         elif len(s) == 82: | ||||
|             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] | ||||
|             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54] | ||||
|         elif len(s) == 81: | ||||
|             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||
|         elif len(s) == 80: | ||||
| @@ -464,56 +457,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             # Fallback to the other algortihms | ||||
|             return self._decrypt_signature(s) | ||||
|  | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         self.report_video_subtitles_download(video_id) | ||||
|         request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) | ||||
|         try: | ||||
|             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             sub_list = self._download_webpage( | ||||
|                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, | ||||
|                 video_id, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) | ||||
|         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|  | ||||
|         sub_lang_list = {} | ||||
|         for l in lang_list: | ||||
|             lang = l[1] | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': lang, | ||||
|                 'v': video_id, | ||||
|                 'fmt': self._downloader.params.get('subtitlesformat'), | ||||
|             }) | ||||
|             url = u'http://www.youtube.com/api/timedtext?' + params | ||||
|             sub_lang_list[lang] = url | ||||
|         if not sub_lang_list: | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|             return {} | ||||
|         return sub_lang_list | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id): | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         self.report_video_subtitles_available(video_id, sub_lang_list) | ||||
|  | ||||
|     def _request_subtitle(self, sub_lang, sub_name, video_id, format): | ||||
|         """ | ||||
|         Return the subtitle as a string or None if they are not found | ||||
|         """ | ||||
|         self.report_video_subtitles_request(video_id, sub_lang, format) | ||||
|         params = compat_urllib_parse.urlencode({ | ||||
|             'lang': sub_lang, | ||||
|             'name': sub_name, | ||||
|             'v': video_id, | ||||
|             'fmt': format, | ||||
|         }) | ||||
|         url = 'http://www.youtube.com/api/timedtext?' + params | ||||
|         try: | ||||
|             sub = compat_urllib_request.urlopen(url).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _request_automatic_caption(self, video_id, webpage): | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0] | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang | ||||
|         err_msg = u'Couldn\'t find automatic captions for %s' % video_id | ||||
|         if mobj is None: | ||||
|             self._downloader.report_warning(err_msg) | ||||
|             return {} | ||||
| @@ -522,53 +497,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             args = player_config[u'args'] | ||||
|             caption_url = args[u'ttsurl'] | ||||
|             timestamp = args[u'timestamp'] | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': 'en', | ||||
|                 'tlang': sub_lang, | ||||
|                 'fmt': sub_format, | ||||
|                 'ts': timestamp, | ||||
|                 'kind': 'asr', | ||||
|             # We get the available subtitles | ||||
|             list_params = compat_urllib_parse.urlencode({ | ||||
|                 'type': 'list', | ||||
|                 'tlangs': 1, | ||||
|                 'asrs': 1, | ||||
|             }) | ||||
|             subtitles_url = caption_url + '&' + params | ||||
|             sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') | ||||
|             return {sub_lang: sub} | ||||
|             list_url = caption_url + '&' + list_params | ||||
|             list_page = self._download_webpage(list_url, video_id) | ||||
|             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8')) | ||||
|             original_lang_node = caption_list.find('track') | ||||
|             if original_lang_node.attrib.get('kind') != 'asr' : | ||||
|                 self._downloader.report_warning(u'Video doesn\'t have automatic captions') | ||||
|                 return {} | ||||
|             original_lang = original_lang_node.attrib['lang_code'] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for lang_node in caption_list.findall('target'): | ||||
|                 sub_lang = lang_node.attrib['lang_code'] | ||||
|                 params = compat_urllib_parse.urlencode({ | ||||
|                     'lang': original_lang, | ||||
|                     'tlang': sub_lang, | ||||
|                     'fmt': sub_format, | ||||
|                     'ts': timestamp, | ||||
|                     'kind': 'asr', | ||||
|                 }) | ||||
|                 sub_lang_list[sub_lang] = caption_url + '&' + params | ||||
|             return sub_lang_list | ||||
|         # An extractor error can be raise by the download process if there are | ||||
|         # no automatic captions but there are subtitles | ||||
|         except (KeyError, ExtractorError): | ||||
|             self._downloader.report_warning(err_msg) | ||||
|             return {} | ||||
|      | ||||
|     def _extract_subtitles(self, video_id): | ||||
|         """ | ||||
|         Return a dictionary: {language: subtitles} or {} if the subtitles | ||||
|         couldn't be found | ||||
|         """ | ||||
|         available_subs_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  not available_subs_list: #There was some error, it didn't get the available subtitles | ||||
|             return {} | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             sub_lang_list = available_subs_list | ||||
|         else: | ||||
|             if self._downloader.params.get('subtitleslangs', False): | ||||
|                 reqested_langs = self._downloader.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs_list: | ||||
|                 reqested_langs = ['en'] | ||||
|             else: | ||||
|                 reqested_langs = [list(available_subs_list.keys())[0]] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for sub_lang in reqested_langs: | ||||
|                 if not sub_lang in available_subs_list: | ||||
|                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) | ||||
|                     continue | ||||
|                 sub_lang_list[sub_lang] = available_subs_list[sub_lang] | ||||
|         subtitles = {} | ||||
|         for sub_lang in sub_lang_list: | ||||
|             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|             if subtitle: | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
| @@ -768,15 +728,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 video_description = u'' | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = None | ||||
|  | ||||
|         if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): | ||||
|             video_subtitles = self._extract_subtitles(video_id) | ||||
|         elif self._downloader.params.get('writeautomaticsub', False): | ||||
|             video_subtitles = self._request_automatic_caption(video_id, video_webpage) | ||||
|         video_subtitles = self.extract_subtitles(video_id, video_webpage) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id) | ||||
|             self._list_available_subtitles(video_id, video_webpage) | ||||
|             return | ||||
|  | ||||
|         if 'length_seconds' not in video_info: | ||||
| @@ -830,10 +785,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                         if self._downloader.params.get('verbose'): | ||||
|                             s = url_data['s'][0] | ||||
|                             if age_gate: | ||||
|                                 player_version = self._search_regex(r'ad3-(.+?)\.swf', | ||||
|                                     video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND', | ||||
|                                     'flash player', fatal=False) | ||||
|                                 player = 'flash player %s' % player_version | ||||
|                                 player = 'flash player' | ||||
|                             else: | ||||
|                                 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, | ||||
|                                     'html5 player', fatal=False) | ||||
| @@ -1055,6 +1007,9 @@ class YoutubeUserIE(InfoExtractor): | ||||
|                 response = json.loads(page) | ||||
|             except ValueError as err: | ||||
|                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) | ||||
|             if 'entry' not in response['feed']: | ||||
|                 # Number of videos is a multiple of self._MAX_RESULTS | ||||
|                 break | ||||
|  | ||||
|             # Extract video identifiers | ||||
|             ids_in_page = [] | ||||
|   | ||||
| @@ -249,7 +249,17 @@ def htmlentity_transform(matchobj): | ||||
|     return (u'&%s;' % entity) | ||||
|  | ||||
| compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix | ||||
| class AttrParser(compat_html_parser.HTMLParser): | ||||
| class BaseHTMLParser(compat_html_parser.HTMLParser): | ||||
|     def __init(self): | ||||
|         compat_html_parser.HTMLParser.__init__(self) | ||||
|         self.html = None | ||||
|  | ||||
|     def loads(self, html): | ||||
|         self.html = html | ||||
|         self.feed(html) | ||||
|         self.close() | ||||
|  | ||||
| class AttrParser(BaseHTMLParser): | ||||
|     """Modified HTMLParser that isolates a tag with the specified attribute""" | ||||
|     def __init__(self, attribute, value): | ||||
|         self.attribute = attribute | ||||
| @@ -257,10 +267,9 @@ class AttrParser(compat_html_parser.HTMLParser): | ||||
|         self.result = None | ||||
|         self.started = False | ||||
|         self.depth = {} | ||||
|         self.html = None | ||||
|         self.watch_startpos = False | ||||
|         self.error_count = 0 | ||||
|         compat_html_parser.HTMLParser.__init__(self) | ||||
|         BaseHTMLParser.__init__(self) | ||||
|  | ||||
|     def error(self, message): | ||||
|         if self.error_count > 10 or self.started: | ||||
| @@ -269,11 +278,6 @@ class AttrParser(compat_html_parser.HTMLParser): | ||||
|         self.error_count += 1 | ||||
|         self.goahead(1) | ||||
|  | ||||
|     def loads(self, html): | ||||
|         self.html = html | ||||
|         self.feed(html) | ||||
|         self.close() | ||||
|  | ||||
|     def handle_starttag(self, tag, attrs): | ||||
|         attrs = dict(attrs) | ||||
|         if self.started: | ||||
| @@ -334,6 +338,38 @@ def get_element_by_attribute(attribute, value, html): | ||||
|         pass | ||||
|     return parser.get_result() | ||||
|  | ||||
| class MetaParser(BaseHTMLParser): | ||||
|     """ | ||||
|     Modified HTMLParser that isolates a meta tag with the specified name  | ||||
|     attribute. | ||||
|     """ | ||||
|     def __init__(self, name): | ||||
|         BaseHTMLParser.__init__(self) | ||||
|         self.name = name | ||||
|         self.content = None | ||||
|         self.result = None | ||||
|  | ||||
|     def handle_starttag(self, tag, attrs): | ||||
|         if tag != 'meta': | ||||
|             return | ||||
|         attrs = dict(attrs) | ||||
|         if attrs.get('name') == self.name: | ||||
|             self.result = attrs.get('content') | ||||
|  | ||||
|     def get_result(self): | ||||
|         return self.result | ||||
|  | ||||
| def get_meta_content(name, html): | ||||
|     """ | ||||
|     Return the content attribute from the meta tag with the given name attribute. | ||||
|     """ | ||||
|     parser = MetaParser(name) | ||||
|     try: | ||||
|         parser.loads(html) | ||||
|     except compat_html_parser.HTMLParseError: | ||||
|         pass | ||||
|     return parser.get_result() | ||||
|  | ||||
|  | ||||
| def clean_html(html): | ||||
|     """Clean an HTML snippet into a readable string""" | ||||
| @@ -664,7 +700,16 @@ def unified_strdate(date_str): | ||||
|     date_str = date_str.replace(',',' ') | ||||
|     # %z (UTC offset) is only supported in python>=3.2 | ||||
|     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) | ||||
|     format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M'] | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|         '%B %d %Y', | ||||
|         '%b %d %Y', | ||||
|         '%Y-%m-%d', | ||||
|         '%d/%m/%Y', | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%d.%m.%Y %H:%M', | ||||
|         '%Y-%m-%dT%H:%M:%SZ', | ||||
|     ] | ||||
|     for expression in format_expressions: | ||||
|         try: | ||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||
| @@ -745,6 +790,18 @@ def platform_name(): | ||||
|     return res | ||||
|  | ||||
|  | ||||
| def write_string(s, out=None): | ||||
|     if out is None: | ||||
|         out = sys.stderr | ||||
|     assert type(s) == type(u'') | ||||
|  | ||||
|     if ('b' in getattr(out, 'mode', '') or | ||||
|             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr | ||||
|         s = s.encode(preferredencoding(), 'ignore') | ||||
|     out.write(s) | ||||
|     out.flush() | ||||
|  | ||||
|  | ||||
| def bytes_to_intlist(bs): | ||||
|     if not bs: | ||||
|         return [] | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2013.09.10' | ||||
| __version__ = '2013.09.20' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user