release 2013.12.23.4

Merge remote-tracking branch 'jaimeMF/split-downloaders'
[youtube] Prefer videos with sound
2013-12-23 05:08:35 +01:00 · 2013-12-23 05:03:32 +01:00 · 2013-12-23 04:51:42 +01:00 · 2013-12-23 04:39:55 +01:00 · 2013-12-23 04:37:40 +01:00 · 2013-12-23 04:31:45 +01:00
15 changed files with 979 additions and 924 deletions
--- a/README.md
+++ b/README.md
@@ -39,7 +39,8 @@ which means you can modify it, redistribute it or use it however you like.
                               /youtube-dl .
    --no-cache-dir             Disable filesystem caching
    --bidi-workaround          Work around terminals that lack bidirectional
-                               text support. Requires fribidi executable in PATH
+                               text support. Requires bidiv or fribidi
                               executable in PATH
 ## Video Selection:
    --playlist-start NUMBER    playlist video to start at (default is 1)
--- a/setup.py
+++ b/setup.py
@@ -71,7 +71,7 @@ setup(
    author_email='ytdl@yt-dl.org',
    maintainer='Philipp Hagemeister',
    maintainer_email='phihag@phihag.de',
-    packages=['youtube_dl', 'youtube_dl.extractor'],
+    packages=['youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader'],
    # Provokes warning on most systems (why?!)
    # test_suite = 'nose.collector',
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -90,7 +90,7 @@ def generator(test_case):
        def _hook(status):
            if status['status'] == 'finished':
                finished_hook_called.add(status['filename'])
-        ydl.fd.add_progress_hook(_hook)
+        ydl.add_downloader_progress_hook(_hook)
        def get_tc_filename(tc):
            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -1,724 +1,12 @@
-import os
+# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
-import re
+from .downloader import FileDownloader as RealFileDownloader
-import subprocess
+from .downloader import get_suitable_downloader
 import sys
 import time
 from .utils import (
    compat_urllib_error,
    compat_urllib_request,
    ContentTooShortError,
    determine_ext,
    encodeFilename,
    format_bytes,
    sanitize_open,
    timeconvert,
 )
 class FileDownloader(object):
    """File Downloader class.
    File downloader objects are the ones responsible of downloading the
    actual video file and writing it to disk.
    File downloaders accept a lot of parameters. In order not to saturate
    the object constructor with arguments, it receives a dictionary of
    options instead.
    Available options:
    verbose:           Print additional info to stdout.
    quiet:             Do not print messages to stdout.
    ratelimit:         Download speed limit, in bytes/sec.
    retries:           Number of times to retry for HTTP error 5xx
    buffersize:        Size of download buffer in bytes.
    noresizebuffer:    Do not automatically resize the download buffer.
    continuedl:        Try to continue downloads if possible.
    noprogress:        Do not print the progress bar.
    logtostderr:       Log messages to stderr instead of stdout.
    consoletitle:      Display progress in console window's titlebar.
    nopart:            Do not use temporary .part files.
    updatetime:        Use the Last-modified header to set output file timestamps.
    test:              Download only first bytes to test the downloader.
    min_filesize:      Skip files smaller than this size
    max_filesize:      Skip files larger than this size
    """
    params = None
    def __init__(self, ydl, params):
        """Create a FileDownloader object with the given options."""
        self.ydl = ydl
        self._progress_hooks = []
        self.params = params
    @staticmethod
    def format_seconds(seconds):
        (mins, secs) = divmod(seconds, 60)
        (hours, mins) = divmod(mins, 60)
        if hours > 99:
            return '--:--:--'
        if hours == 0:
            return '%02d:%02d' % (mins, secs)
        else:
            return '%02d:%02d:%02d' % (hours, mins, secs)
    @staticmethod
    def calc_percent(byte_counter, data_len):
        if data_len is None:
            return None
        return float(byte_counter) / float(data_len) * 100.0
    @staticmethod
    def format_percent(percent):
        if percent is None:
            return '---.-%'
        return '%6s' % ('%3.1f%%' % percent)
    @staticmethod
    def calc_eta(start, now, total, current):
        if total is None:
            return None
        dif = now - start
        if current == 0 or dif < 0.001: # One millisecond
            return None
        rate = float(current) / dif
        return int((float(total) - float(current)) / rate)
    @staticmethod
    def format_eta(eta):
        if eta is None:
            return '--:--'
        return FileDownloader.format_seconds(eta)
    @staticmethod
    def calc_speed(start, now, bytes):
        dif = now - start
        if bytes == 0 or dif < 0.001: # One millisecond
            return None
        return float(bytes) / dif
    @staticmethod
    def format_speed(speed):
        if speed is None:
            return '%10s' % '---b/s'
        return '%10s' % ('%s/s' % format_bytes(speed))
    @staticmethod
    def best_block_size(elapsed_time, bytes):
        new_min = max(bytes / 2.0, 1.0)
        new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
        if elapsed_time < 0.001:
            return int(new_max)
        rate = bytes / elapsed_time
        if rate > new_max:
            return int(new_max)
        if rate < new_min:
            return int(new_min)
        return int(rate)
    @staticmethod
    def parse_bytes(bytestr):
        """Parse a string indicating a byte quantity into an integer."""
        matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
        if matchobj is None:
            return None
        number = float(matchobj.group(1))
        multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
        return int(round(number * multiplier))
    def to_screen(self, *args, **kargs):
        self.ydl.to_screen(*args, **kargs)
    def to_stderr(self, message):
        self.ydl.to_screen(message)
    def to_console_title(self, message):
        self.ydl.to_console_title(message)
    def trouble(self, *args, **kargs):
        self.ydl.trouble(*args, **kargs)
    def report_warning(self, *args, **kargs):
        self.ydl.report_warning(*args, **kargs)
    def report_error(self, *args, **kargs):
        self.ydl.report_error(*args, **kargs)
    def slow_down(self, start_time, byte_counter):
        """Sleep if the download speed is over the rate limit."""
        rate_limit = self.params.get('ratelimit', None)
        if rate_limit is None or byte_counter == 0:
            return
        now = time.time()
        elapsed = now - start_time
        if elapsed <= 0.0:
            return
        speed = float(byte_counter) / elapsed
        if speed > rate_limit:
            time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
    def temp_name(self, filename):
        """Returns a temporary filename for the given filename."""
        if self.params.get('nopart', False) or filename == u'-' or \
                (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
            return filename
        return filename + u'.part'
    def undo_temp_name(self, filename):
        if filename.endswith(u'.part'):
            return filename[:-len(u'.part')]
        return filename
    def try_rename(self, old_filename, new_filename):
        try:
            if old_filename == new_filename:
                return
            os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
        except (IOError, OSError):
            self.report_error(u'unable to rename file')
    def try_utime(self, filename, last_modified_hdr):
        """Try to set the last-modified time of the given file."""
        if last_modified_hdr is None:
            return
        if not os.path.isfile(encodeFilename(filename)):
            return
        timestr = last_modified_hdr
        if timestr is None:
            return
        filetime = timeconvert(timestr)
        if filetime is None:
            return filetime
        # Ignore obviously invalid dates
        if filetime == 0:
            return
        try:
            os.utime(filename, (time.time(), filetime))
        except:
            pass
        return filetime
    def report_destination(self, filename):
        """Report destination filename."""
        self.to_screen(u'[download] Destination: ' + filename)
    def _report_progress_status(self, msg, is_last_line=False):
        fullmsg = u'[download] ' + msg
        if self.params.get('progress_with_newline', False):
            self.to_screen(fullmsg)
        else:
            if os.name == 'nt':
                prev_len = getattr(self, '_report_progress_prev_line_length',
                                   0)
                if prev_len > len(fullmsg):
                    fullmsg += u' ' * (prev_len - len(fullmsg))
                self._report_progress_prev_line_length = len(fullmsg)
                clear_line = u'\r'
            else:
                clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
            self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
        self.to_console_title(u'youtube-dl ' + msg)
    def report_progress(self, percent, data_len_str, speed, eta):
        """Report download progress."""
        if self.params.get('noprogress', False):
            return
        if eta is not None:
            eta_str = self.format_eta(eta)
        else:
            eta_str = 'Unknown ETA'
        if percent is not None:
            percent_str = self.format_percent(percent)
        else:
            percent_str = 'Unknown %'
        speed_str = self.format_speed(speed)
        msg = (u'%s of %s at %s ETA %s' %
               (percent_str, data_len_str, speed_str, eta_str))
        self._report_progress_status(msg)
    def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
        if self.params.get('noprogress', False):
            return
        downloaded_str = format_bytes(downloaded_data_len)
        speed_str = self.format_speed(speed)
        elapsed_str = FileDownloader.format_seconds(elapsed)
        msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
        self._report_progress_status(msg)
    def report_finish(self, data_len_str, tot_time):
        """Report download finished."""
        if self.params.get('noprogress', False):
            self.to_screen(u'[download] Download completed')
        else:
            self._report_progress_status(
                (u'100%% of %s in %s' %
                 (data_len_str, self.format_seconds(tot_time))),
                is_last_line=True)
    def report_resuming_byte(self, resume_len):
        """Report attempt to resume at given byte."""
        self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
    def report_retry(self, count, retries):
        """Report retry in case of HTTP error 5xx"""
        self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
    def report_file_already_downloaded(self, file_name):
        """Report file has already been fully downloaded."""
        try:
            self.to_screen(u'[download] %s has already been downloaded' % file_name)
        except UnicodeEncodeError:
            self.to_screen(u'[download] The file has already been downloaded')
    def report_unable_to_resume(self):
        """Report it was impossible to resume download."""
        self.to_screen(u'[download] Unable to resume')
    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
        def run_rtmpdump(args):
            start = time.time()
            resume_percent = None
            resume_downloaded_data_len = None
            proc = subprocess.Popen(args, stderr=subprocess.PIPE)
            cursor_in_new_line = True
            proc_stderr_closed = False
            while not proc_stderr_closed:
                # read line from stderr
                line = u''
                while True:
                    char = proc.stderr.read(1)
                    if not char:
                        proc_stderr_closed = True
                        break
                    if char in [b'\r', b'\n']:
                        break
                    line += char.decode('ascii', 'replace')
                if not line:
                    # proc_stderr_closed is True
                    continue
                mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
                if mobj:
                    downloaded_data_len = int(float(mobj.group(1))*1024)
                    percent = float(mobj.group(2))
                    if not resume_percent:
                        resume_percent = percent
                        resume_downloaded_data_len = downloaded_data_len
                    eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
                    speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
                    data_len = None
                    if percent > 0:
                        data_len = int(downloaded_data_len * 100 / percent)
                    data_len_str = u'~' + format_bytes(data_len)
                    self.report_progress(percent, data_len_str, speed, eta)
                    cursor_in_new_line = False
                    self._hook_progress({
                        'downloaded_bytes': downloaded_data_len,
                        'total_bytes': data_len,
                        'tmpfilename': tmpfilename,
                        'filename': filename,
                        'status': 'downloading',
                        'eta': eta,
                        'speed': speed,
                    })
                else:
                    # no percent for live streams
                    mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
                    if mobj:
                        downloaded_data_len = int(float(mobj.group(1))*1024)
                        time_now = time.time()
                        speed = self.calc_speed(start, time_now, downloaded_data_len)
                        self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
                        cursor_in_new_line = False
                        self._hook_progress({
                            'downloaded_bytes': downloaded_data_len,
                            'tmpfilename': tmpfilename,
                            'filename': filename,
                            'status': 'downloading',
                            'speed': speed,
                        })
                    elif self.params.get('verbose', False):
                        if not cursor_in_new_line:
                            self.to_screen(u'')
                        cursor_in_new_line = True
                        self.to_screen(u'[rtmpdump] '+line)
            proc.wait()
            if not cursor_in_new_line:
                self.to_screen(u'')
            return proc.returncode
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
        test = self.params.get('test', False)
        # Check for rtmpdump first
        try:
            subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
        except (OSError, IOError):
            self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
            return False
        # Download using rtmpdump. rtmpdump returns exit code 2 when
        # the connection was interrumpted and resuming appears to be
        # possible. This is part of rtmpdump's normal usage, AFAIK.
        basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
        if player_url is not None:
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
            basic_args += ['--pageUrl', page_url]
        if play_path is not None:
            basic_args += ['--playpath', play_path]
        if tc_url is not None:
            basic_args += ['--tcUrl', url]
        if test:
            basic_args += ['--stop', '1']
        if live:
            basic_args += ['--live']
        if conn:
            basic_args += ['--conn', conn]
        args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
        if sys.platform == 'win32' and sys.version_info < (3, 0):
            # Windows subprocess module does not actually support Unicode
            # on Python 2.x
            # See http://stackoverflow.com/a/9951851/35070
            subprocess_encoding = sys.getfilesystemencoding()
            args = [a.encode(subprocess_encoding, 'ignore') for a in args]
        else:
            subprocess_encoding = None
        if self.params.get('verbose', False):
            if subprocess_encoding:
                str_args = [
                    a.decode(subprocess_encoding) if isinstance(a, bytes) else a
                    for a in args]
            else:
                str_args = args
            try:
                import pipes
                shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
            except ImportError:
                shell_quote = repr
            self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
        retval = run_rtmpdump(args)
        while (retval == 2 or retval == 1) and not test:
            prevsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
            time.sleep(5.0) # This seems to be needed
            retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
            cursize = os.path.getsize(encodeFilename(tmpfilename))
            if prevsize == cursize and retval == 1:
                break
             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
            if prevsize == cursize and retval == 2 and cursize > 1024:
                self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
                retval = 0
                break
        if retval == 0 or (test and retval == 2):
            fsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'[rtmpdump] %s bytes' % fsize)
            self.try_rename(tmpfilename, filename)
            self._hook_progress({
                'downloaded_bytes': fsize,
                'total_bytes': fsize,
                'filename': filename,
                'status': 'finished',
            })
            return True
        else:
            self.to_stderr(u"\n")
            self.report_error(u'rtmpdump exited with code %d' % retval)
            return False
    def _download_with_mplayer(self, filename, url):
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
        args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
        # Check for mplayer first
        try:
            subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
        except (OSError, IOError):
            self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
            return False
        # Download using mplayer. 
        retval = subprocess.call(args)
        if retval == 0:
            fsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
            self.try_rename(tmpfilename, filename)
            self._hook_progress({
                'downloaded_bytes': fsize,
                'total_bytes': fsize,
                'filename': filename,
                'status': 'finished',
            })
            return True
        else:
            self.to_stderr(u"\n")
            self.report_error(u'mplayer exited with code %d' % retval)
            return False
    def _download_m3u8_with_ffmpeg(self, filename, url):
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
            '-bsf:a', 'aac_adtstoasc', tmpfilename]
        for program in ['avconv', 'ffmpeg']:
            try:
                subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
                break
            except (OSError, IOError):
                pass
        else:
            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
        cmd = [program] + args
        retval = subprocess.call(cmd)
        if retval == 0:
            fsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
            self.try_rename(tmpfilename, filename)
            self._hook_progress({
                'downloaded_bytes': fsize,
                'total_bytes': fsize,
                'filename': filename,
                'status': 'finished',
            })
            return True
        else:
            self.to_stderr(u"\n")
            self.report_error(u'ffmpeg exited with code %d' % retval)
            return False
 # This class reproduces the old behaviour of FileDownloader
 class FileDownloader(RealFileDownloader):
    def _do_download(self, filename, info_dict):
-        url = info_dict['url']
+        real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
        # Check file already present
        if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
            self.report_file_already_downloaded(filename)
            self._hook_progress({
                'filename': filename,
                'status': 'finished',
                'total_bytes': os.path.getsize(encodeFilename(filename)),
            })
            return True
        # Attempt to download using rtmpdump
        if url.startswith('rtmp'):
            return self._download_with_rtmpdump(filename, url,
                                                info_dict.get('player_url', None),
                                                info_dict.get('page_url', None),
                                                info_dict.get('play_path', None),
                                                info_dict.get('tc_url', None),
                                                info_dict.get('rtmp_live', False),
                                                info_dict.get('rtmp_conn', None))
        # Attempt to download using mplayer
        if url.startswith('mms') or url.startswith('rtsp'):
            return self._download_with_mplayer(filename, url)
        # m3u8 manifest are downloaded with ffmpeg
        if determine_ext(url) == u'm3u8':
            return self._download_m3u8_with_ffmpeg(filename, url)
        tmpfilename = self.temp_name(filename)
        stream = None
        # Do not include the Accept-Encoding header
        headers = {'Youtubedl-no-compression': 'True'}
        if 'user_agent' in info_dict:
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
        basic_request = compat_urllib_request.Request(url, None, headers)
        request = compat_urllib_request.Request(url, None, headers)
        if self.params.get('test', False):
            request.add_header('Range','bytes=0-10240')
        # Establish possible resume length
        if os.path.isfile(encodeFilename(tmpfilename)):
            resume_len = os.path.getsize(encodeFilename(tmpfilename))
        else:
            resume_len = 0
        open_mode = 'wb'
        if resume_len != 0:
            if self.params.get('continuedl', False):
                self.report_resuming_byte(resume_len)
                request.add_header('Range','bytes=%d-' % resume_len)
                open_mode = 'ab'
            else:
                resume_len = 0
        count = 0
        retries = self.params.get('retries', 0)
        while count <= retries:
            # Establish connection
            try:
                if count == 0 and 'urlhandle' in info_dict:
                    data = info_dict['urlhandle']
                data = compat_urllib_request.urlopen(request)
                break
            except (compat_urllib_error.HTTPError, ) as err:
                if (err.code < 500 or err.code >= 600) and err.code != 416:
                    # Unexpected HTTP error
                    raise
                elif err.code == 416:
                    # Unable to resume (requested range not satisfiable)
                    try:
                        # Open the connection again without the range header
                        data = compat_urllib_request.urlopen(basic_request)
                        content_length = data.info()['Content-Length']
                    except (compat_urllib_error.HTTPError, ) as err:
                        if err.code < 500 or err.code >= 600:
                            raise
                    else:
                        # Examine the reported length
                        if (content_length is not None and
                                (resume_len - 100 < int(content_length) < resume_len + 100)):
                            # The file had already been fully downloaded.
                            # Explanation to the above condition: in issue #175 it was revealed that
                            # YouTube sometimes adds or removes a few bytes from the end of the file,
                            # changing the file size slightly and causing problems for some users. So
                            # I decided to implement a suggested change and consider the file
                            # completely downloaded if the file size differs less than 100 bytes from
                            # the one in the hard drive.
                            self.report_file_already_downloaded(filename)
                            self.try_rename(tmpfilename, filename)
                            self._hook_progress({
                                'filename': filename,
                                'status': 'finished',
                            })
                            return True
                        else:
                            # The length does not match, we start the download over
                            self.report_unable_to_resume()
                            open_mode = 'wb'
                            break
            # Retry
            count += 1
            if count <= retries:
                self.report_retry(count, retries)
        if count > retries:
            self.report_error(u'giving up after %s retries' % retries)
            return False
        data_len = data.info().get('Content-length', None)
        if data_len is not None:
            data_len = int(data_len) + resume_len
            min_data_len = self.params.get("min_filesize", None)
            max_data_len =  self.params.get("max_filesize", None)
            if min_data_len is not None and data_len < min_data_len:
                self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
                return False
            if max_data_len is not None and data_len > max_data_len:
                self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                return False
        data_len_str = format_bytes(data_len)
        byte_counter = 0 + resume_len
        block_size = self.params.get('buffersize', 1024)
        start = time.time()
        while True:
            # Download and write
            before = time.time()
            data_block = data.read(block_size)
            after = time.time()
            if len(data_block) == 0:
                break
            byte_counter += len(data_block)
            # Open file just in time
            if stream is None:
                try:
                    (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
                    assert stream is not None
                    filename = self.undo_temp_name(tmpfilename)
                    self.report_destination(filename)
                except (OSError, IOError) as err:
                    self.report_error(u'unable to open for writing: %s' % str(err))
                    return False
            try:
                stream.write(data_block)
            except (IOError, OSError) as err:
                self.to_stderr(u"\n")
                self.report_error(u'unable to write data: %s' % str(err))
                return False
            if not self.params.get('noresizebuffer', False):
                block_size = self.best_block_size(after - before, len(data_block))
            # Progress message
            speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
            if data_len is None:
                eta = percent = None
            else:
                percent = self.calc_percent(byte_counter, data_len)
                eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
            self.report_progress(percent, data_len_str, speed, eta)
            self._hook_progress({
                'downloaded_bytes': byte_counter,
                'total_bytes': data_len,
                'tmpfilename': tmpfilename,
                'filename': filename,
                'status': 'downloading',
                'eta': eta,
                'speed': speed,
            })
            # Apply rate limit
            self.slow_down(start, byte_counter - resume_len)
        if stream is None:
            self.to_stderr(u"\n")
            self.report_error(u'Did not get any data blocks')
            return False
        stream.close()
        self.report_finish(data_len_str, (time.time() - start))
        if data_len is not None and byte_counter != data_len:
            raise ContentTooShortError(byte_counter, int(data_len))
        self.try_rename(tmpfilename, filename)
        # Update file modification time
        if self.params.get('updatetime', True):
            info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
        self._hook_progress({
            'downloaded_bytes': byte_counter,
            'total_bytes': byte_counter,
            'filename': filename,
            'status': 'finished',
        })
        return True
    def _hook_progress(self, status):
        for ph in self._progress_hooks:
-            ph(status)
+            real_fd.add_progress_hook(ph)
-
+        return real_fd.download(filename, info_dict)
    def add_progress_hook(self, ph):
        """ ph gets called on download progress, with a dictionary with the entries
        * filename: The final filename
        * status: One of "downloading" and "finished"
        It can also have some of the following entries:
        * downloaded_bytes: Bytes on disks
        * total_bytes: Total bytes, None if unknown
        * tmpfilename: The filename we're currently writing to
        * eta: The estimated time in seconds, None if unknown
        * speed: The download speed in bytes/second, None if unknown
        Hooks are guaranteed to be called at least once (with status "finished")
        if the download is successful.
        """
        self._progress_hooks.append(ph)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -53,7 +53,7 @@ from .utils import (
    YoutubeDLHandler,
 )
 from .extractor import get_info_extractor, gen_extractors
-from .FileDownloader import FileDownloader
+from .downloader import get_suitable_downloader
 from .version import __version__
@@ -167,7 +167,7 @@ class YoutubeDL(object):
        self._ies = []
        self._ies_instances = {}
        self._pps = []
-        self._progress_hooks = []
+        self._fd_progress_hooks = []
        self._download_retcode = 0
        self._num_downloads = 0
        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@@ -183,12 +183,18 @@ class YoutubeDL(object):
                    width_args = []
                else:
                    width_args = ['-w', str(width)]
-                self._fribidi = subprocess.Popen(
+                sp_kwargs = dict(
                    ['fribidi', '-c', 'UTF-8'] + width_args,
                    stdin=subprocess.PIPE,
                    stdout=slave,
                    stderr=self._err_file)
-                self._fribidi_channel = os.fdopen(master, 'rb')
+                try:
                    self._output_process = subprocess.Popen(
                        ['bidiv'] + width_args, **sp_kwargs
                    )
                except OSError:
                    self._output_process = subprocess.Popen(
                        ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
                self._output_channel = os.fdopen(master, 'rb')
            except OSError as ose:
                if ose.errno == 2:
                    self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
@@ -205,8 +211,6 @@ class YoutubeDL(object):
                u'Set the LC_ALL environment variable to fix this.')
            self.params['restrictfilenames'] = True
        self.fd = FileDownloader(self, self.params)
        if '%(stitle)s' in self.params.get('outtmpl', ''):
            self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@@ -242,15 +246,20 @@ class YoutubeDL(object):
        self._pps.append(pp)
        pp.set_downloader(self)
    def add_downloader_progress_hook(self, ph):
        """Add the progress hook to the file downloader"""
        self._fd_progress_hooks.append(ph)
    def _bidi_workaround(self, message):
-        if not hasattr(self, '_fribidi_channel'):
+        if not hasattr(self, '_output_channel'):
            return message
        assert hasattr(self, '_output_process')
        assert type(message) == type(u'')
        line_count = message.count(u'\n') + 1
-        self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
+        self._output_process.stdin.write((message + u'\n').encode('utf-8'))
-        self._fribidi.stdin.flush()
+        self._output_process.stdin.flush()
-        res = u''.join(self._fribidi_channel.readline().decode('utf-8')
+        res = u''.join(self._output_channel.readline().decode('utf-8')
                       for _ in range(line_count))
        return res[:-len(u'\n')]
@@ -636,7 +645,7 @@ class YoutubeDL(object):
            info_dict['playlist_index'] = None
        # This extractors handle format selection themselves
-        if info_dict['extractor'] in [u'youtube', u'Youku']:
+        if info_dict['extractor'] in [u'Youku']:
            if download:
                self.process_info(info_dict)
            return info_dict
@@ -662,10 +671,6 @@ class YoutubeDL(object):
            if 'ext' not in format:
                format['ext'] = determine_ext(format['url'])
        if self.params.get('listformats', None):
            self.list_formats(info_dict)
            return
        format_limit = self.params.get('format_limit', None)
        if format_limit:
            formats = list(takewhile_inclusive(
@@ -678,9 +683,16 @@ class YoutubeDL(object):
                except ValueError:
                    ext_ord = -1
                # We only compare the extension if they have the same height and width
-                return (f.get('height'), f.get('width'), ext_ord)
+                return (f.get('height') if f.get('height') is not None else -1,
                        f.get('width') if f.get('width') is not None else -1,
                        ext_ord)
            formats = sorted(formats, key=_free_formats_key)
        info_dict['formats'] = formats
        if self.params.get('listformats', None):
            self.list_formats(info_dict)
            return
        req_format = self.params.get('format', 'best')
        if req_format is None:
            req_format = 'best'
@@ -870,7 +882,10 @@ class YoutubeDL(object):
                success = True
            else:
                try:
-                    success = self.fd._do_download(filename, info_dict)
+                    fd = get_suitable_downloader(info_dict)(self, self.params)
                    for ph in self._fd_progress_hooks:
                        fd.add_progress_hook(ph)
                    success = fd.download(filename, info_dict)
                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                    self.report_error(u'unable to download video data: %s' % str(err))
                    return
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -194,7 +194,7 @@ def parseOpts(overrideArguments=None):
        type=float, default=None, help=optparse.SUPPRESS_HELP)
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
-        help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
+        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
    selection.add_option(
--- a/youtube_dl/downloader/init.py
+++ b/youtube_dl/downloader/init.py
@@ -0,0 +1,23 @@
 from .common import FileDownloader
 from .hls import HlsFD
 from .http import HttpFD
 from .mplayer import MplayerFD
 from .rtmp import RtmpFD
 from ..utils import (
    determine_ext,
 )
 def get_suitable_downloader(info_dict):
    """Get the downloader class that can handle the info dict."""
    url = info_dict['url']
    if url.startswith('rtmp'):
        return RtmpFD
    if determine_ext(url) == u'm3u8':
        return HlsFD
    if url.startswith('mms') or url.startswith('rtsp'):
        return MplayerFD
    else:
        return HttpFD
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -0,0 +1,321 @@
 import math
 import os
 import re
 import subprocess
 import sys
 import time
 from ..utils import (
    encodeFilename,
    timeconvert,
    format_bytes,
 )
 class FileDownloader(object):
    """File Downloader class.
    File downloader objects are the ones responsible of downloading the
    actual video file and writing it to disk.
    File downloaders accept a lot of parameters. In order not to saturate
    the object constructor with arguments, it receives a dictionary of
    options instead.
    Available options:
    verbose:           Print additional info to stdout.
    quiet:             Do not print messages to stdout.
    ratelimit:         Download speed limit, in bytes/sec.
    retries:           Number of times to retry for HTTP error 5xx
    buffersize:        Size of download buffer in bytes.
    noresizebuffer:    Do not automatically resize the download buffer.
    continuedl:        Try to continue downloads if possible.
    noprogress:        Do not print the progress bar.
    logtostderr:       Log messages to stderr instead of stdout.
    consoletitle:      Display progress in console window's titlebar.
    nopart:            Do not use temporary .part files.
    updatetime:        Use the Last-modified header to set output file timestamps.
    test:              Download only first bytes to test the downloader.
    min_filesize:      Skip files smaller than this size
    max_filesize:      Skip files larger than this size
    Subclasses of this one must re-define the real_download method.
    """
    params = None
    def __init__(self, ydl, params):
        """Create a FileDownloader object with the given options."""
        self.ydl = ydl
        self._progress_hooks = []
        self.params = params
    @staticmethod
    def format_seconds(seconds):
        (mins, secs) = divmod(seconds, 60)
        (hours, mins) = divmod(mins, 60)
        if hours > 99:
            return '--:--:--'
        if hours == 0:
            return '%02d:%02d' % (mins, secs)
        else:
            return '%02d:%02d:%02d' % (hours, mins, secs)
    @staticmethod
    def calc_percent(byte_counter, data_len):
        if data_len is None:
            return None
        return float(byte_counter) / float(data_len) * 100.0
    @staticmethod
    def format_percent(percent):
        if percent is None:
            return '---.-%'
        return '%6s' % ('%3.1f%%' % percent)
    @staticmethod
    def calc_eta(start, now, total, current):
        if total is None:
            return None
        dif = now - start
        if current == 0 or dif < 0.001: # One millisecond
            return None
        rate = float(current) / dif
        return int((float(total) - float(current)) / rate)
    @staticmethod
    def format_eta(eta):
        if eta is None:
            return '--:--'
        return FileDownloader.format_seconds(eta)
    @staticmethod
    def calc_speed(start, now, bytes):
        dif = now - start
        if bytes == 0 or dif < 0.001: # One millisecond
            return None
        return float(bytes) / dif
    @staticmethod
    def format_speed(speed):
        if speed is None:
            return '%10s' % '---b/s'
        return '%10s' % ('%s/s' % format_bytes(speed))
    @staticmethod
    def best_block_size(elapsed_time, bytes):
        new_min = max(bytes / 2.0, 1.0)
        new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
        if elapsed_time < 0.001:
            return int(new_max)
        rate = bytes / elapsed_time
        if rate > new_max:
            return int(new_max)
        if rate < new_min:
            return int(new_min)
        return int(rate)
    @staticmethod
    def parse_bytes(bytestr):
        """Parse a string indicating a byte quantity into an integer."""
        matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
        if matchobj is None:
            return None
        number = float(matchobj.group(1))
        multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
        return int(round(number * multiplier))
    def to_screen(self, *args, **kargs):
        self.ydl.to_screen(*args, **kargs)
    def to_stderr(self, message):
        self.ydl.to_screen(message)
    def to_console_title(self, message):
        self.ydl.to_console_title(message)
    def trouble(self, *args, **kargs):
        self.ydl.trouble(*args, **kargs)
    def report_warning(self, *args, **kargs):
        self.ydl.report_warning(*args, **kargs)
    def report_error(self, *args, **kargs):
        self.ydl.report_error(*args, **kargs)
    def slow_down(self, start_time, byte_counter):
        """Sleep if the download speed is over the rate limit."""
        rate_limit = self.params.get('ratelimit', None)
        if rate_limit is None or byte_counter == 0:
            return
        now = time.time()
        elapsed = now - start_time
        if elapsed <= 0.0:
            return
        speed = float(byte_counter) / elapsed
        if speed > rate_limit:
            time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
    def temp_name(self, filename):
        """Returns a temporary filename for the given filename."""
        if self.params.get('nopart', False) or filename == u'-' or \
                (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
            return filename
        return filename + u'.part'
    def undo_temp_name(self, filename):
        if filename.endswith(u'.part'):
            return filename[:-len(u'.part')]
        return filename
    def try_rename(self, old_filename, new_filename):
        try:
            if old_filename == new_filename:
                return
            os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
        except (IOError, OSError) as err:
            self.report_error(u'unable to rename file')
    def try_utime(self, filename, last_modified_hdr):
        """Try to set the last-modified time of the given file."""
        if last_modified_hdr is None:
            return
        if not os.path.isfile(encodeFilename(filename)):
            return
        timestr = last_modified_hdr
        if timestr is None:
            return
        filetime = timeconvert(timestr)
        if filetime is None:
            return filetime
        # Ignore obviously invalid dates
        if filetime == 0:
            return
        try:
            os.utime(filename, (time.time(), filetime))
        except:
            pass
        return filetime
    def report_destination(self, filename):
        """Report destination filename."""
        self.to_screen(u'[download] Destination: ' + filename)
    def _report_progress_status(self, msg, is_last_line=False):
        fullmsg = u'[download] ' + msg
        if self.params.get('progress_with_newline', False):
            self.to_screen(fullmsg)
        else:
            if os.name == 'nt':
                prev_len = getattr(self, '_report_progress_prev_line_length',
                                   0)
                if prev_len > len(fullmsg):
                    fullmsg += u' ' * (prev_len - len(fullmsg))
                self._report_progress_prev_line_length = len(fullmsg)
                clear_line = u'\r'
            else:
                clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
            self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
        self.to_console_title(u'youtube-dl ' + msg)
    def report_progress(self, percent, data_len_str, speed, eta):
        """Report download progress."""
        if self.params.get('noprogress', False):
            return
        if eta is not None:
            eta_str = self.format_eta(eta)
        else:
            eta_str = 'Unknown ETA'
        if percent is not None:
            percent_str = self.format_percent(percent)
        else:
            percent_str = 'Unknown %'
        speed_str = self.format_speed(speed)
        msg = (u'%s of %s at %s ETA %s' %
               (percent_str, data_len_str, speed_str, eta_str))
        self._report_progress_status(msg)
    def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
        if self.params.get('noprogress', False):
            return
        downloaded_str = format_bytes(downloaded_data_len)
        speed_str = self.format_speed(speed)
        elapsed_str = FileDownloader.format_seconds(elapsed)
        msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
        self._report_progress_status(msg)
    def report_finish(self, data_len_str, tot_time):
        """Report download finished."""
        if self.params.get('noprogress', False):
            self.to_screen(u'[download] Download completed')
        else:
            self._report_progress_status(
                (u'100%% of %s in %s' %
                 (data_len_str, self.format_seconds(tot_time))),
                is_last_line=True)
    def report_resuming_byte(self, resume_len):
        """Report attempt to resume at given byte."""
        self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
    def report_retry(self, count, retries):
        """Report retry in case of HTTP error 5xx"""
        self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
    def report_file_already_downloaded(self, file_name):
        """Report file has already been fully downloaded."""
        try:
            self.to_screen(u'[download] %s has already been downloaded' % file_name)
        except UnicodeEncodeError:
            self.to_screen(u'[download] The file has already been downloaded')
    def report_unable_to_resume(self):
        """Report it was impossible to resume download."""
        self.to_screen(u'[download] Unable to resume')
    def download(self, filename, info_dict):
        """Download to a filename using the info from info_dict
        Return True on success and False otherwise
        """
        url = info_dict['url']
        # Check file already present
        if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
            self.report_file_already_downloaded(filename)
            self._hook_progress({
                'filename': filename,
                'status': 'finished',
                'total_bytes': os.path.getsize(encodeFilename(filename)),
            })
            return True
        else:
            return self.real_download(filename, info_dict)
    def real_download(self, filename, info_dict):
        """Real download process. Redefine in subclasses."""
        raise NotImplementedError(u'This method must be implemented by sublcasses')
    def _hook_progress(self, status):
        for ph in self._progress_hooks:
            ph(status)
    def add_progress_hook(self, ph):
        """ ph gets called on download progress, with a dictionary with the entries
        * filename: The final filename
        * status: One of "downloading" and "finished"
        It can also have some of the following entries:
        * downloaded_bytes: Bytes on disks
        * total_bytes: Total bytes, None if unknown
        * tmpfilename: The filename we're currently writing to
        * eta: The estimated time in seconds, None if unknown
        * speed: The download speed in bytes/second, None if unknown
        Hooks are guaranteed to be called at least once (with status "finished")
        if the download is successful.
        """
        self._progress_hooks.append(ph)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -0,0 +1,44 @@
 import os
 import subprocess
 from .common import FileDownloader
 from ..utils import (
    encodeFilename,
 )
 class HlsFD(FileDownloader):
    def real_download(self, filename, info_dict):
        url = info_dict['url']
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
            '-bsf:a', 'aac_adtstoasc', tmpfilename]
        for program in ['avconv', 'ffmpeg']:
            try:
                subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
                break
            except (OSError, IOError):
                pass
        else:
            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
        cmd = [program] + args
        retval = subprocess.call(cmd)
        if retval == 0:
            fsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
            self.try_rename(tmpfilename, filename)
            self._hook_progress({
                'downloaded_bytes': fsize,
                'total_bytes': fsize,
                'filename': filename,
                'status': 'finished',
            })
            return True
        else:
            self.to_stderr(u"\n")
            self.report_error(u'ffmpeg exited with code %d' % retval)
            return False
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -0,0 +1,191 @@
 import os
 import re
 import subprocess
 import sys
 import time
 from .common import FileDownloader
 from ..utils import (
    compat_urllib_request,
    compat_urllib_error,
    ContentTooShortError,
    encodeFilename,
    sanitize_open,
    format_bytes,
 )
 class HttpFD(FileDownloader):
    def real_download(self, filename, info_dict):
        url = info_dict['url']
        tmpfilename = self.temp_name(filename)
        stream = None
        # Do not include the Accept-Encoding header
        headers = {'Youtubedl-no-compression': 'True'}
        if 'user_agent' in info_dict:
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
        basic_request = compat_urllib_request.Request(url, None, headers)
        request = compat_urllib_request.Request(url, None, headers)
        if self.params.get('test', False):
            request.add_header('Range','bytes=0-10240')
        # Establish possible resume length
        if os.path.isfile(encodeFilename(tmpfilename)):
            resume_len = os.path.getsize(encodeFilename(tmpfilename))
        else:
            resume_len = 0
        open_mode = 'wb'
        if resume_len != 0:
            if self.params.get('continuedl', False):
                self.report_resuming_byte(resume_len)
                request.add_header('Range','bytes=%d-' % resume_len)
                open_mode = 'ab'
            else:
                resume_len = 0
        count = 0
        retries = self.params.get('retries', 0)
        while count <= retries:
            # Establish connection
            try:
                if count == 0 and 'urlhandle' in info_dict:
                    data = info_dict['urlhandle']
                data = compat_urllib_request.urlopen(request)
                break
            except (compat_urllib_error.HTTPError, ) as err:
                if (err.code < 500 or err.code >= 600) and err.code != 416:
                    # Unexpected HTTP error
                    raise
                elif err.code == 416:
                    # Unable to resume (requested range not satisfiable)
                    try:
                        # Open the connection again without the range header
                        data = compat_urllib_request.urlopen(basic_request)
                        content_length = data.info()['Content-Length']
                    except (compat_urllib_error.HTTPError, ) as err:
                        if err.code < 500 or err.code >= 600:
                            raise
                    else:
                        # Examine the reported length
                        if (content_length is not None and
                                (resume_len - 100 < int(content_length) < resume_len + 100)):
                            # The file had already been fully downloaded.
                            # Explanation to the above condition: in issue #175 it was revealed that
                            # YouTube sometimes adds or removes a few bytes from the end of the file,
                            # changing the file size slightly and causing problems for some users. So
                            # I decided to implement a suggested change and consider the file
                            # completely downloaded if the file size differs less than 100 bytes from
                            # the one in the hard drive.
                            self.report_file_already_downloaded(filename)
                            self.try_rename(tmpfilename, filename)
                            self._hook_progress({
                                'filename': filename,
                                'status': 'finished',
                            })
                            return True
                        else:
                            # The length does not match, we start the download over
                            self.report_unable_to_resume()
                            open_mode = 'wb'
                            break
            # Retry
            count += 1
            if count <= retries:
                self.report_retry(count, retries)
        if count > retries:
            self.report_error(u'giving up after %s retries' % retries)
            return False
        data_len = data.info().get('Content-length', None)
        if data_len is not None:
            data_len = int(data_len) + resume_len
            min_data_len = self.params.get("min_filesize", None)
            max_data_len =  self.params.get("max_filesize", None)
            if min_data_len is not None and data_len < min_data_len:
                self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
                return False
            if max_data_len is not None and data_len > max_data_len:
                self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                return False
        data_len_str = format_bytes(data_len)
        byte_counter = 0 + resume_len
        block_size = self.params.get('buffersize', 1024)
        start = time.time()
        while True:
            # Download and write
            before = time.time()
            data_block = data.read(block_size)
            after = time.time()
            if len(data_block) == 0:
                break
            byte_counter += len(data_block)
            # Open file just in time
            if stream is None:
                try:
                    (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
                    assert stream is not None
                    filename = self.undo_temp_name(tmpfilename)
                    self.report_destination(filename)
                except (OSError, IOError) as err:
                    self.report_error(u'unable to open for writing: %s' % str(err))
                    return False
            try:
                stream.write(data_block)
            except (IOError, OSError):
                self.to_stderr(u"\n")
                self.report_error(u'unable to write data: %s' % str(err))
                return False
            if not self.params.get('noresizebuffer', False):
                block_size = self.best_block_size(after - before, len(data_block))
            # Progress message
            speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
            if data_len is None:
                eta = percent = None
            else:
                percent = self.calc_percent(byte_counter, data_len)
                eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
            self.report_progress(percent, data_len_str, speed, eta)
            self._hook_progress({
                'downloaded_bytes': byte_counter,
                'total_bytes': data_len,
                'tmpfilename': tmpfilename,
                'filename': filename,
                'status': 'downloading',
                'eta': eta,
                'speed': speed,
            })
            # Apply rate limit
            self.slow_down(start, byte_counter - resume_len)
        if stream is None:
            self.to_stderr(u"\n")
            self.report_error(u'Did not get any data blocks')
            return False
        stream.close()
        self.report_finish(data_len_str, (time.time() - start))
        if data_len is not None and byte_counter != data_len:
            raise ContentTooShortError(byte_counter, int(data_len))
        self.try_rename(tmpfilename, filename)
        # Update file modification time
        if self.params.get('updatetime', True):
            info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
        self._hook_progress({
            'downloaded_bytes': byte_counter,
            'total_bytes': byte_counter,
            'filename': filename,
            'status': 'finished',
        })
        return True
--- a/youtube_dl/downloader/mplayer.py
+++ b/youtube_dl/downloader/mplayer.py
@@ -0,0 +1,39 @@
 import os
 import subprocess
 from .common import FileDownloader
 from ..utils import (
    encodeFilename,
 )
 class MplayerFD(FileDownloader):
    def real_download(self, filename, info_dict):
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
        args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
        # Check for mplayer first
        try:
            subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
        except (OSError, IOError):
            self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
            return False
        # Download using mplayer. 
        retval = subprocess.call(args)
        if retval == 0:
            fsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
            self.try_rename(tmpfilename, filename)
            self._hook_progress({
                'downloaded_bytes': fsize,
                'total_bytes': fsize,
                'filename': filename,
                'status': 'finished',
            })
            return True
        else:
            self.to_stderr(u"\n")
            self.report_error(u'mplayer exited with code %d' % retval)
            return False
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -0,0 +1,178 @@
 import os
 import re
 import subprocess
 import sys
 import time
 from .common import FileDownloader
 from ..utils import (
    encodeFilename,
    format_bytes,
 )
 class RtmpFD(FileDownloader):
    def real_download(self, filename, info_dict):
        def run_rtmpdump(args):
            start = time.time()
            resume_percent = None
            resume_downloaded_data_len = None
            proc = subprocess.Popen(args, stderr=subprocess.PIPE)
            cursor_in_new_line = True
            proc_stderr_closed = False
            while not proc_stderr_closed:
                # read line from stderr
                line = u''
                while True:
                    char = proc.stderr.read(1)
                    if not char:
                        proc_stderr_closed = True
                        break
                    if char in [b'\r', b'\n']:
                        break
                    line += char.decode('ascii', 'replace')
                if not line:
                    # proc_stderr_closed is True
                    continue
                mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
                if mobj:
                    downloaded_data_len = int(float(mobj.group(1))*1024)
                    percent = float(mobj.group(2))
                    if not resume_percent:
                        resume_percent = percent
                        resume_downloaded_data_len = downloaded_data_len
                    eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
                    speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
                    data_len = None
                    if percent > 0:
                        data_len = int(downloaded_data_len * 100 / percent)
                    data_len_str = u'~' + format_bytes(data_len)
                    self.report_progress(percent, data_len_str, speed, eta)
                    cursor_in_new_line = False
                    self._hook_progress({
                        'downloaded_bytes': downloaded_data_len,
                        'total_bytes': data_len,
                        'tmpfilename': tmpfilename,
                        'filename': filename,
                        'status': 'downloading',
                        'eta': eta,
                        'speed': speed,
                    })
                else:
                    # no percent for live streams
                    mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
                    if mobj:
                        downloaded_data_len = int(float(mobj.group(1))*1024)
                        time_now = time.time()
                        speed = self.calc_speed(start, time_now, downloaded_data_len)
                        self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
                        cursor_in_new_line = False
                        self._hook_progress({
                            'downloaded_bytes': downloaded_data_len,
                            'tmpfilename': tmpfilename,
                            'filename': filename,
                            'status': 'downloading',
                            'speed': speed,
                        })
                    elif self.params.get('verbose', False):
                        if not cursor_in_new_line:
                            self.to_screen(u'')
                        cursor_in_new_line = True
                        self.to_screen(u'[rtmpdump] '+line)
            proc.wait()
            if not cursor_in_new_line:
                self.to_screen(u'')
            return proc.returncode
        url = info_dict['url']
        player_url = info_dict.get('player_url', None)
        page_url = info_dict.get('page_url', None)
        play_path = info_dict.get('play_path', None)
        tc_url = info_dict.get('tc_url', None)
        live = info_dict.get('rtmp_live', False)
        conn = info_dict.get('rtmp_conn', None)
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
        test = self.params.get('test', False)
        # Check for rtmpdump first
        try:
            subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
        except (OSError, IOError):
            self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
            return False
        # Download using rtmpdump. rtmpdump returns exit code 2 when
        # the connection was interrumpted and resuming appears to be
        # possible. This is part of rtmpdump's normal usage, AFAIK.
        basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
        if player_url is not None:
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
            basic_args += ['--pageUrl', page_url]
        if play_path is not None:
            basic_args += ['--playpath', play_path]
        if tc_url is not None:
            basic_args += ['--tcUrl', url]
        if test:
            basic_args += ['--stop', '1']
        if live:
            basic_args += ['--live']
        if conn:
            basic_args += ['--conn', conn]
        args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
        if sys.platform == 'win32' and sys.version_info < (3, 0):
            # Windows subprocess module does not actually support Unicode
            # on Python 2.x
            # See http://stackoverflow.com/a/9951851/35070
            subprocess_encoding = sys.getfilesystemencoding()
            args = [a.encode(subprocess_encoding, 'ignore') for a in args]
        else:
            subprocess_encoding = None
        if self.params.get('verbose', False):
            if subprocess_encoding:
                str_args = [
                    a.decode(subprocess_encoding) if isinstance(a, bytes) else a
                    for a in args]
            else:
                str_args = args
            try:
                import pipes
                shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
            except ImportError:
                shell_quote = repr
            self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
        retval = run_rtmpdump(args)
        while (retval == 2 or retval == 1) and not test:
            prevsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
            time.sleep(5.0) # This seems to be needed
            retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
            cursize = os.path.getsize(encodeFilename(tmpfilename))
            if prevsize == cursize and retval == 1:
                break
             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
            if prevsize == cursize and retval == 2 and cursize > 1024:
                self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
                retval = 0
                break
        if retval == 0 or (test and retval == 2):
            fsize = os.path.getsize(encodeFilename(tmpfilename))
            self.to_screen(u'[rtmpdump] %s bytes' % fsize)
            self.try_rename(tmpfilename, filename)
            self._hook_progress({
                'downloaded_bytes': fsize,
                'total_bytes': fsize,
                'filename': filename,
                'status': 'finished',
            })
            return True
        else:
            self.to_stderr(u"\n")
            self.report_error(u'rtmpdump exited with code %d' % retval)
            return False
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -70,13 +70,14 @@ class BlipTVIE(InfoExtractor):
        info = None
        urlh = self._request_webpage(request, None, False,
            u'unable to download video info webpage')
        if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
            basename = url.split('/')[-1]
            title,ext = os.path.splitext(basename)
            title = title.decode('UTF-8')
            ext = ext.replace('.', '')
            self.report_direct_download(title)
-            info = {
+            return {
                'id': title,
                'url': url,
                'uploader': None,
@@ -85,7 +86,7 @@ class BlipTVIE(InfoExtractor):
                'ext': ext,
                'urlhandle': urlh
            }
-        if info is None: # Regular URL
+
        try:
            json_code_bytes = urlh.read()
            json_code = json_code_bytes.decode('utf-8')
@@ -111,7 +112,7 @@ class BlipTVIE(InfoExtractor):
                raise ValueError('Can not determine filename extension')
            ext = umobj.group(1)
-                info = {
+            return {
                'id': compat_str(data['item_id']),
                'url': video_url,
                'uploader': data['display_name'],
@@ -124,11 +125,9 @@ class BlipTVIE(InfoExtractor):
                'player_url': data['embedUrl'],
                'user_agent': 'iTunes/10.6.1',
            }
-            except (ValueError,KeyError) as err:
+        except (ValueError, KeyError) as err:
            raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
        return [info]
 class BlipTVUserIE(InfoExtractor):
    """Information Extractor for blip.tv users."""
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -162,23 +162,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                          # Dash audio
                          '141', '172', '140', '171', '139',
                          ]
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
                                      # Apple HTTP Live Streaming
                                      '96', '95', '94', '93', '92', '132', '151',
                                      # 3D
                                      '85', '102', '84', '101', '83', '100', '82',
                                      # Dash video
                                      '138', '248', '137', '247', '136', '246', '245',
                                      '244', '135', '243', '134', '242', '133', '160',
                                      # Dash audio
                                      '172', '141', '171', '140', '139',
                                      ]
    _video_formats_map = {
        'flv': ['35', '34', '6', '5'],
        '3gp': ['36', '17', '13'],
        'mp4': ['38', '37', '22', '18'],
        'webm': ['46', '45', '44', '43'],
    }
    _video_extensions = {
        '13': '3gp',
        '17': '3gp',
@@ -236,54 +219,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        '248': 'webm',
    }
    _video_dimensions = {
-        '5': '400x240',
+        '5': {'width': 400, 'height': 240},
-        '6': '???',
+        '6': {},
-        '13': '???',
+        '13': {},
-        '17': '176x144',
+        '17': {'width': 176, 'height': 144},
-        '18': '640x360',
+        '18': {'width': 640, 'height': 360},
-        '22': '1280x720',
+        '22': {'width': 1280, 'height': 720},
-        '34': '640x360',
+        '34': {'width': 640, 'height': 360},
-        '35': '854x480',
+        '35': {'width': 854, 'height': 480},
-        '36': '320x240',
+        '36': {'width': 320, 'height': 240},
-        '37': '1920x1080',
+        '37': {'width': 1920, 'height': 1080},
-        '38': '4096x3072',
+        '38': {'width': 4096, 'height': 3072},
-        '43': '640x360',
+        '43': {'width': 640, 'height': 360},
-        '44': '854x480',
+        '44': {'width': 854, 'height': 480},
-        '45': '1280x720',
+        '45': {'width': 1280, 'height': 720},
-        '46': '1920x1080',
+        '46': {'width': 1920, 'height': 1080},
-        '82': '360p',
+        '82': {'height': 360, 'display': '360p'},
-        '83': '480p',
+        '83': {'height': 480, 'display': '480p'},
-        '84': '720p',
+        '84': {'height': 720, 'display': '720p'},
-        '85': '1080p',
+        '85': {'height': 1080, 'display': '1080p'},
-        '92': '240p',
+        '92': {'height': 240, 'display': '240p'},
-        '93': '360p',
+        '93': {'height': 360, 'display': '360p'},
-        '94': '480p',
+        '94': {'height': 480, 'display': '480p'},
-        '95': '720p',
+        '95': {'height': 720, 'display': '720p'},
-        '96': '1080p',
+        '96': {'height': 1080, 'display': '1080p'},
-        '100': '360p',
+        '100': {'height': 360, 'display': '360p'},
-        '101': '480p',
+        '101': {'height': 480, 'display': '480p'},
-        '102': '720p',
+        '102': {'height': 720, 'display': '720p'},
-        '132': '240p',
+        '132': {'height': 240, 'display': '240p'},
-        '151': '72p',
+        '151': {'height': 72, 'display': '72p'},
-        '133': '240p',
+        '133': {'height': 240, 'display': '240p'},
-        '134': '360p',
+        '134': {'height': 360, 'display': '360p'},
-        '135': '480p',
+        '135': {'height': 480, 'display': '480p'},
-        '136': '720p',
+        '136': {'height': 720, 'display': '720p'},
-        '137': '1080p',
+        '137': {'height': 1080, 'display': '1080p'},
-        '138': '>1080p',
+        '138': {'height': 1081, 'display': '>1080p'},
-        '139': '48k',
+        '139': {'display': '48k'},
-        '140': '128k',
+        '140': {'display': '128k'},
-        '141': '256k',
+        '141': {'display': '256k'},
-        '160': '192p',
+        '160': {'height': 192, 'display': '192p'},
-        '171': '128k',
+        '171': {'display': '128k'},
-        '172': '256k',
+        '172': {'display': '256k'},
-        '242': '240p',
+        '242': {'height': 240, 'display': '240p'},
-        '243': '360p',
+        '243': {'height': 360, 'display': '360p'},
-        '244': '480p',
+        '244': {'height': 480, 'display': '480p'},
-        '245': '480p',
+        '245': {'height': 480, 'display': '480p'},
-        '246': '480p',
+        '246': {'height': 480, 'display': '480p'},
-        '247': '720p',
+        '247': {'height': 720, 'display': '720p'},
-        '248': '1080p',
+        '248': {'height': 1080, 'display': '1080p'},
    }
    _special_itags = {
        '82': '3D',
@@ -1153,13 +1136,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            self._downloader.report_warning(err_msg)
            return {}
    def _print_formats(self, formats):
        print('Available formats:')
        for x in formats:
            print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
                                        self._video_dimensions.get(x, '???'),
                                        ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
    def _extract_id(self, url):
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
        if mobj is None:
@@ -1172,48 +1148,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        Transform a dictionary in the format {itag:url} to a list of (itag, url)
        with the requested formats.
        """
-        req_format = self._downloader.params.get('format', None)
+        existing_formats = [x for x in self._available_formats if x in url_map]
        format_limit = self._downloader.params.get('format_limit', None)
        available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
        if format_limit is not None and format_limit in available_formats:
            format_list = available_formats[available_formats.index(format_limit):]
        else:
            format_list = available_formats
        existing_formats = [x for x in format_list if x in url_map]
        if len(existing_formats) == 0:
            raise ExtractorError(u'no known formats available for video')
        if self._downloader.params.get('listformats', None):
            self._print_formats(existing_formats)
            return
        if req_format is None or req_format == 'best':
            video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
        elif req_format == 'worst':
            video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
        elif req_format in ('-1', 'all'):
        video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
-        else:
+        video_url_list.reverse() # order worst to best
            # Specific formats. We pick the first in a slash-delimeted sequence.
            # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
            # available in the specified format. For example,
            # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
            # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
            # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
            req_formats = req_format.split('/')
            video_url_list = None
            for rf in req_formats:
                if rf in url_map:
                    video_url_list = [(rf, url_map[rf])]
                    break
                if rf in self._video_formats_map:
                    for srf in self._video_formats_map[rf]:
                        if srf in url_map:
                            video_url_list = [(srf, url_map[srf])]
                            break
                    else:
                        continue
                    break
            if video_url_list is None:
                raise ExtractorError(u'requested format not available')
        return video_url_list
    def _extract_from_m3u8(self, manifest_url, video_id):
@@ -1462,40 +1401,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                        url += '&ratebypass=yes'
                    url_map[url_data['itag'][0]] = url
            video_url_list = self._get_video_url_list(url_map)
            if not video_url_list:
                return
        elif video_info.get('hlsvp'):
            manifest_url = video_info['hlsvp'][0]
            url_map = self._extract_from_m3u8(manifest_url, video_id)
            video_url_list = self._get_video_url_list(url_map)
            if not video_url_list:
                return
        else:
            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
-        results = []
+        formats = []
        for itag, video_real_url in video_url_list:
            # Extension
            video_extension = self._video_extensions.get(itag, 'flv')
            resolution = self._video_dimensions.get(itag, {}).get('display')
            width = self._video_dimensions.get(itag, {}).get('width')
            height = self._video_dimensions.get(itag, {}).get('height')
            note = self._special_itags.get(itag)
            video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
-                                              self._video_dimensions.get(itag, '???'),
+                                              '%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'),
                                              ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
-            results.append({
+            formats.append({
                'id':       video_id,
                'url':         video_real_url,
                'ext':         video_extension,
                'format':      video_format,
                'format_id':   itag,
                'player_url':  player_url,
                '_resolution': resolution,
                'width':       width,
                'height':      height,
                'format_note': note,
            })
        def _formats_key(f):
            note = f.get('format_note')
            if note is None:
                note = u''
            is_dash = u'DASH' in note
            return (
                0 if is_dash else 1,
                f.get('height') if f.get('height') is not None else -1,
                f.get('width') if f.get('width') is not None else -1)
        formats.sort(key=_formats_key)
        return {
            'id':           video_id,
            'uploader':     video_uploader,
            'uploader_id':  video_uploader_id,
            'upload_date':  upload_date,
            'title':        video_title,
                'ext':      video_extension,
                'format':   video_format,
                'format_id': itag,
            'thumbnail':    video_thumbnail,
            'description':  video_description,
                'player_url':   player_url,
            'subtitles':    video_subtitles,
            'duration':     video_duration,
            'age_limit':    18 if age_gate else 0,
@@ -1504,8 +1460,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'view_count':   view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
-            })
+            'formats':      formats,
-        return results
+        }
 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    IE_DESC = u'YouTube.com playlists'
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.12.23'
+__version__ = '2013.12.23.4'
Author	SHA1	Message	Date
Philipp Hagemeister	8958b6916c	release 2013.12.23.4	2013-12-23 05:08:35 +01:00
Philipp Hagemeister	9fc3bef87a	Merge remote-tracking branch 'jaimeMF/split-downloaders'	2013-12-23 05:03:32 +01:00
Philipp Hagemeister	d80044c235	[youtube] Prefer videos with sound	2013-12-23 04:51:42 +01:00
Philipp Hagemeister	bc2103f3bf	release 2013.12.23.3	2013-12-23 04:39:55 +01:00
Philipp Hagemeister	f82b18efc1	Merge remote-tracking branch 'rzhxeo/youtube'	2013-12-23 04:37:40 +01:00
Philipp Hagemeister	504c668d3b	release 2013.12.23.2	2013-12-23 04:31:45 +01:00
Philipp Hagemeister	466617f539	[bliptv] Simplify (From #2000 )	2013-12-23 04:31:38 +01:00
Philipp Hagemeister	196938835a	Remove debugging code Introduced by accident in `5d681e960d`	2013-12-23 04:30:57 +01:00
Philipp Hagemeister	a94e129a65	release 2013.12.23.1	2013-12-23 04:20:25 +01:00
Philipp Hagemeister	5d681e960d	Use bidiv instead of fribidi if available (Fixes #1912 )	2013-12-23 04:19:50 +01:00
rzhxeo	62d68c43ed	Make prefer_free_formats sorting more robust	2013-12-18 21:25:13 +01:00
rzhxeo	bfaae0a768	Filter and sort videos before calling list_formats	2013-12-18 21:24:39 +01:00
rzhxeo	e56f22ae20	[YoutubeIE] Sort formats by resolution	2013-12-18 21:22:37 +01:00
rzhxeo	dbd1988ed9	[YoutubeIE] Add width and height to format dict	2013-12-18 21:21:25 +01:00
rzhxeo	4ea3be0a5c	[YoutubeIE] Externalize format selection	2013-12-18 03:30:55 +01:00
Jaime Marquínez Ferrándiz	3bc2ddccc8	Move FileDownloader to its own module and create a new class for each download process A suitable downloader can be found using the 'get_suitable_downloader' function. Each subclass implements 'real_download', for downloading an info dict you call the 'download' method, which first checks if the video has already been downloaded	2013-12-11 16:18:48 +01:00
Jaime Marquínez Ferrándiz	8ab470f1b2	Now a new FileDownloader is created when downloading a video The progress hooks can be added using the method "add_downloader_progress_hook"	2013-12-11 16:04:42 +01:00
`@@ -1,2 +1,2 @@`

	`__version__ = '2013.12.23'`	`__version__ = '2013.12.23.4'`