release 2013.10.23.2

[dailymotion] Extract all the available formats (closes #1028 )
[nowvideo] Fix key extraction
2013-10-23 18:38:09 +02:00 · 2013-10-23 17:33:38 +02:00 · 2013-10-23 17:00:33 +02:00 · 2013-10-23 16:32:17 +02:00 · 2013-10-23 16:31:53 +02:00 · 2013-10-23 15:12:33 +02:00
32 changed files with 625 additions and 157 deletions
--- a/13
+++ b/13
@@ -13,13 +13,13 @@ PYTHON=/usr/bin/env python

 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
 ifeq ($(PREFIX),/usr)
-    SYSCONFDIR=/etc
+	SYSCONFDIR=/etc
 else
-    ifeq ($(PREFIX),/usr/local)
-        SYSCONFDIR=/etc
-    else
-        SYSCONFDIR=$(PREFIX)/etc
-    endif
+	ifeq ($(PREFIX),/usr/local)
+		SYSCONFDIR=/etc
+	else
+		SYSCONFDIR=$(PREFIX)/etc
+	endif
 endif

 install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
@@ -71,6 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
 		--exclude '*~' \
 		--exclude '__pycache' \
 		--exclude '.git' \
+		--exclude 'testdata' \
 		-- \
 		bin devscripts test youtube_dl \
 		CHANGELOG LICENSE README.md README.txt \
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@ which means you can modify it, redistribute it or use it however you like.
                               sudo if needed)
    -i, --ignore-errors        continue on download errors, for example to to
                               skip unavailable videos in a playlist
+    --abort-on-error           Abort downloading of further videos (in the
+                               playlist or the command line) if an error occurs
    --dump-user-agent          display the current browser identification
    --user-agent UA            specify a custom user agent
    --referer REF              specify a custom referer, use if the video access
@@ -30,7 +32,7 @@ which means you can modify it, redistribute it or use it however you like.
    --extractor-descriptions   Output descriptions of all supported extractors
    --proxy URL                Use the specified HTTP/HTTPS proxy
    --no-check-certificate     Suppress HTTPS certificate validation.
-    --cache-dir None           Location in the filesystem where youtube-dl can
+    --cache-dir DIR            Location in the filesystem where youtube-dl can
                               store downloaded information permanently. By
                               default $XDG_CACHE_HOME/youtube-dl or ~/.cache
                               /youtube-dl .
@@ -76,15 +78,17 @@ which means you can modify it, redistribute it or use it however you like.
                               %(uploader_id)s for the uploader nickname if
                               different, %(autonumber)s to get an automatically
                               incremented number, %(ext)s for the filename
-                               extension, %(upload_date)s for the upload date
-                               (YYYYMMDD), %(extractor)s for the provider
-                               (youtube, metacafe, etc), %(id)s for the video id
-                               , %(playlist)s for the playlist the video is in,
-                               %(playlist_index)s for the position in the
-                               playlist and %% for a literal percent. Use - to
-                               output to stdout. Can also be used to download to
-                               a different directory, for example with -o '/my/d
-                               ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+                               extension, %(format)s for the format description
+                               (like "22 - 1280x720" or "HD")%(upload_date)s for
+                               the upload date (YYYYMMDD), %(extractor)s for the
+                               provider (youtube, metacafe, etc), %(id)s for the
+                               video id , %(playlist)s for the playlist the
+                               video is in, %(playlist_index)s for the position
+                               in the playlist and %% for a literal percent. Use
+                               - to output to stdout. Can also be used to
+                               download to a different directory, for example
+                               with -o '/my/downloads/%(uploader)s/%(title)s-%(i
+                               d)s.%(ext)s' .
    --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
                               when it is present in output filename template or
                               --autonumber option is given
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -1,4 +1,4 @@
-__youtube-dl()
+__youtube_dl()
 {
    local cur prev opts
    COMPREPLY=()
@@ -15,4 +15,4 @@ __youtube-dl()
    fi
 }

-complete -F __youtube-dl youtube-dl
+complete -F __youtube_dl youtube-dl
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -88,10 +88,6 @@ ROOT=$(pwd)
    "$ROOT/devscripts/gh-pages/update-sites.py"
    git add *.html *.html.in update
    git commit -m "release $version"
-    git show HEAD
-    read -p "Is it good, can I push? (y/n) " -n 1
-    if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
-    echo
    git push "$ROOT" gh-pages
    git push "$ORIGIN_URL" gh-pages
 )
--- a/setup.py
+++ b/setup.py
@@ -63,6 +63,7 @@ setup(
    ' YouTube.com and other video sites.',
    url='https://github.com/rg3/youtube-dl',
    author='Ricardo Garcia',
+    author_email='ytdl@yt-dl.org',
    maintainer='Philipp Hagemeister',
    maintainer_email='phihag@phihag.de',
    packages=['youtube_dl', 'youtube_dl.extractor'],
--- a/test/helper.py
+++ b/test/helper.py
@@ -34,10 +34,10 @@ def try_rm(filename):


 class FakeYDL(YoutubeDL):
-    def __init__(self):
+    def __init__(self, override=None):
        # Different instances of the downloader can't share the same dictionary
        # some test set the "sublang" parameter, which would break the md5 checks.
-        params = get_params()
+        params = get_params(override=override)
        super(FakeYDL, self).__init__(params)
        self.result = []
        
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL
+
+
+class YDL(FakeYDL):
+    def __init__(self, *args, **kwargs):
+        super(YDL, self).__init__(*args, **kwargs)
+        self.downloaded_info_dicts = []
+        self.msgs = []
+
+    def process_info(self, info_dict):
+        self.downloaded_info_dicts.append(info_dict)
+
+    def to_screen(self, msg):
+        self.msgs.append(msg)
+
+
+class TestFormatSelection(unittest.TestCase):
+    def test_prefer_free_formats(self):
+        # Same resolution => download webm
+        ydl = YDL()
+        ydl.params['prefer_free_formats'] = True
+        formats = [
+            {u'ext': u'webm', u'height': 460},
+            {u'ext': u'mp4',  u'height': 460},
+        ]
+        info_dict = {u'formats': formats, u'extractor': u'test'}
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded[u'ext'], u'webm')
+
+        # Different resolution => download best quality (mp4)
+        ydl = YDL()
+        ydl.params['prefer_free_formats'] = True
+        formats = [
+            {u'ext': u'webm', u'height': 720},
+            {u'ext': u'mp4', u'height': 1080},
+        ]
+        info_dict[u'formats'] = formats
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded[u'ext'], u'mp4')
+
+        # No prefer_free_formats => keep original formats order
+        ydl = YDL()
+        ydl.params['prefer_free_formats'] = False
+        formats = [
+            {u'ext': u'webm', u'height': 720},
+            {u'ext': u'flv', u'height': 720},
+        ]
+        info_dict[u'formats'] = formats
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded[u'ext'], u'flv')
+
+    def test_format_limit(self):
+        formats = [
+            {u'format_id': u'meh'},
+            {u'format_id': u'good'},
+            {u'format_id': u'great'},
+            {u'format_id': u'excellent'},
+        ]
+        info_dict = {
+            u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
+
+        ydl = YDL()
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded[u'format_id'], u'excellent')
+
+        ydl = YDL({'format_limit': 'good'})
+        assert ydl.params['format_limit'] == 'good'
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded[u'format_id'], u'good')
+
+        ydl = YDL({'format_limit': 'great', 'format': 'all'})
+        ydl.process_ie_result(info_dict)
+        self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
+        self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
+        self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
+        self.assertTrue('3' in ydl.msgs[0])
+
+        ydl = YDL()
+        ydl.params['format_limit'] = 'excellent'
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded[u'format_id'], u'excellent')
+
+    def test_format_selection(self):
+        formats = [
+            {u'format_id': u'35', u'ext': u'mp4'},
+            {u'format_id': u'45', u'ext': u'webm'},
+            {u'format_id': u'47', u'ext': u'webm'},
+            {u'format_id': u'2', u'ext': u'flv'},
+        ]
+        info_dict = {u'formats': formats, u'extractor': u'test'}
+
+        ydl = YDL({'format': u'20/47'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], u'47')
+
+        ydl = YDL({'format': u'20/71/worst'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], u'35')
+
+        ydl = YDL()
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], u'2')
+
+        ydl = YDL({'format': u'webm/mp4'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], u'47')
+
+        ydl = YDL({'format': u'3gp/40/mp4'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], u'35')
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -91,7 +91,7 @@ class YoutubeDL(object):
    downloadarchive:   File name of a file where all downloads are recorded.
                       Videos already present in the file are not downloaded
                       again.
-    
+
    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
    nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
@@ -216,10 +216,10 @@ class YoutubeDL(object):
        If stderr is a tty file the 'WARNING:' will be colored
        '''
        if sys.stderr.isatty() and os.name != 'nt':
-            _msg_header=u'\033[0;33mWARNING:\033[0m'
+            _msg_header = u'\033[0;33mWARNING:\033[0m'
        else:
-            _msg_header=u'WARNING:'
-        warning_message=u'%s %s' % (_msg_header,message)
+            _msg_header = u'WARNING:'
+        warning_message = u'%s %s' % (_msg_header, message)
        self.to_stderr(warning_message)

    def report_error(self, message, tb=None):
@@ -234,19 +234,6 @@ class YoutubeDL(object):
        error_message = u'%s %s' % (_msg_header, message)
        self.trouble(error_message, tb)

-    def slow_down(self, start_time, byte_counter):
-        """Sleep if the download speed is over the rate limit."""
-        rate_limit = self.params.get('ratelimit', None)
-        if rate_limit is None or byte_counter == 0:
-            return
-        now = time.time()
-        elapsed = now - start_time
-        if elapsed <= 0.0:
-            return
-        speed = float(byte_counter) / elapsed
-        if speed > rate_limit:
-            time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
-
    def report_writedescription(self, descfn):
        """ Report that the description file is being written """
        self.to_screen(u'[info] Writing video description to: ' + descfn)
@@ -288,13 +275,15 @@ class YoutubeDL(object):
            if template_dict['playlist_index'] is not None:
                template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']

-            sanitize = lambda k,v: sanitize_filename(
+            sanitize = lambda k, v: sanitize_filename(
                u'NA' if v is None else compat_str(v),
                restricted=self.params.get('restrictfilenames'),
-                is_id=(k==u'id'))
-            template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
+                is_id=(k == u'id'))
+            template_dict = dict((k, sanitize(k, v))
+                                 for k, v in template_dict.items())

-            filename = self.params['outtmpl'] % template_dict
+            tmpl = os.path.expanduser(self.params['outtmpl'])
+            filename = tmpl % template_dict
            return filename
        except KeyError as err:
            self.report_error(u'Erroneous output template')
@@ -328,14 +317,14 @@ class YoutubeDL(object):
            return (u'%(title)s has already been recorded in archive'
                    % info_dict)
        return None
-        
+
    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
        '''
        Returns a list with a dictionary for each video we find.
        If 'download', also downloads the videos.
        extra_info is a dict containing the extra values to add to each result
         '''
-        
+
        if ie_key:
            ies = [self.get_info_extractor(ie_key)]
        else:
@@ -377,7 +366,7 @@ class YoutubeDL(object):
                    raise
        else:
            self.report_error(u'no suitable InfoExtractor: %s' % url)
-        
+
    def process_ie_result(self, ie_result, download=True, extra_info={}):
        """
        Take the result of the ie(may be modified) and resolve all unresolved
@@ -390,13 +379,7 @@ class YoutubeDL(object):
        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
        if result_type == 'video':
            ie_result.update(extra_info)
-            if 'playlist' not in ie_result:
-                # It isn't part of a playlist
-                ie_result['playlist'] = None
-                ie_result['playlist_index'] = None
-            if download:
-                self.process_info(ie_result)
-            return ie_result
+            return self.process_video_result(ie_result)
        elif result_type == 'url':
            # We have to add extra_info to the results because it may be
            # contained in a playlist
@@ -407,7 +390,7 @@ class YoutubeDL(object):
        elif result_type == 'playlist':
            # We process each entry in the playlist
            playlist = ie_result.get('title', None) or ie_result.get('id', None)
-            self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
+            self.to_screen(u'[download] Downloading playlist: %s' % playlist)

            playlist_results = []

@@ -425,12 +408,12 @@ class YoutubeDL(object):
            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
                (ie_result['extractor'], playlist, n_all_entries, n_entries))

-            for i,entry in enumerate(entries,1):
-                self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
+            for i, entry in enumerate(entries, 1):
+                self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
                extra = {
-                         'playlist': playlist, 
-                         'playlist_index': i + playliststart,
-                         }
+                    'playlist': playlist,
+                    'playlist_index': i + playliststart,
+                }
                if not 'extractor' in entry:
                    # We set the extractor, if it's an url it will be set then to
                    # the new extractor, but if it's already a video we must make
@@ -454,6 +437,103 @@ class YoutubeDL(object):
        else:
            raise Exception('Invalid result type: %s' % result_type)

+    def select_format(self, format_spec, available_formats):
+        if format_spec == 'best' or format_spec is None:
+            return available_formats[-1]
+        elif format_spec == 'worst':
+            return available_formats[0]
+        else:
+            extensions = [u'mp4', u'flv', u'webm', u'3gp']
+            if format_spec in extensions:
+                filter_f = lambda f: f['ext'] == format_spec
+            else:
+                filter_f = lambda f: f['format_id'] == format_spec
+            matches = list(filter(filter_f, available_formats))
+            if matches:
+                return matches[-1]
+        return None
+
+    def process_video_result(self, info_dict, download=True):
+        assert info_dict.get('_type', 'video') == 'video'
+
+        if 'playlist' not in info_dict:
+            # It isn't part of a playlist
+            info_dict['playlist'] = None
+            info_dict['playlist_index'] = None
+
+        # This extractors handle format selection themselves
+        if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
+            if download:
+                self.process_info(info_dict)
+            return info_dict
+
+        # We now pick which formats have to be downloaded
+        if info_dict.get('formats') is None:
+            # There's only one format available
+            formats = [info_dict]
+        else:
+            formats = info_dict['formats']
+
+        # We check that all the formats have the format and format_id fields
+        for (i, format) in enumerate(formats):
+            if format.get('format_id') is None:
+                format['format_id'] = compat_str(i)
+            if format.get('format') is None:
+                format['format'] = u'{id} - {res}{note}'.format(
+                    id=format['format_id'],
+                    res=self.format_resolution(format),
+                    note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
+                )
+
+        if self.params.get('listformats', None):
+            self.list_formats(info_dict)
+            return
+
+        format_limit = self.params.get('format_limit', None)
+        if format_limit:
+            formats = list(takewhile_inclusive(
+                lambda f: f['format_id'] != format_limit, formats
+            ))
+        if self.params.get('prefer_free_formats'):
+            def _free_formats_key(f):
+                try:
+                    ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
+                except ValueError:
+                    ext_ord = -1
+                # We only compare the extension if they have the same height and width
+                return (f.get('height'), f.get('width'), ext_ord)
+            formats = sorted(formats, key=_free_formats_key)
+
+        req_format = self.params.get('format', 'best')
+        if req_format is None:
+            req_format = 'best'
+        formats_to_download = []
+        # The -1 is for supporting YoutubeIE
+        if req_format in ('-1', 'all'):
+            formats_to_download = formats
+        else:
+            # We can accept formats requestd in the format: 34/5/best, we pick
+            # the first that is available, starting from left
+            req_formats = req_format.split('/')
+            for rf in req_formats:
+                selected_format = self.select_format(rf, formats)
+                if selected_format is not None:
+                    formats_to_download = [selected_format]
+                    break
+        if not formats_to_download:
+            raise ExtractorError(u'requested format not available')
+
+        if download:
+            if len(formats_to_download) > 1:
+                self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
+            for format in formats_to_download:
+                new_info = dict(info_dict)
+                new_info.update(format)
+                self.process_info(new_info)
+        # We update the info dict with the best quality format (backwards compatibility)
+        info_dict.update(formats_to_download[-1])
+        return info_dict
+
    def process_info(self, info_dict):
        """Process a single resolved IE result."""

@@ -529,20 +609,20 @@ class YoutubeDL(object):

        if self.params.get('writeannotations', False):
            try:
-               annofn = filename + u'.annotations.xml'
-               self.report_writeannotations(annofn)
-               with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
-                   annofile.write(info_dict['annotations'])
+                annofn = filename + u'.annotations.xml'
+                self.report_writeannotations(annofn)
+                with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+                    annofile.write(info_dict['annotations'])
            except (KeyError, TypeError):
                self.report_warning(u'There are no annotations to write.')
            except (OSError, IOError):
-                 self.report_error(u'Cannot write annotations file: ' + annofn)
-                 return
+                self.report_error(u'Cannot write annotations file: ' + annofn)
+                return

        subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                       self.params.get('writeautomaticsub')])

-        if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+        if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
            # subtitles download errors are already managed as troubles in relevant IE
            # that way it will silently go on when used with unsupporting IE
            subtitles = info_dict['subtitles']
@@ -564,7 +644,7 @@ class YoutubeDL(object):
            infofn = filename + u'.info.json'
            self.report_writeinfojson(infofn)
            try:
-                json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
+                json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
                write_json_file(json_info_dict, encodeFilename(infofn))
            except (OSError, IOError):
                self.report_error(u'Cannot write metadata to JSON file ' + infofn)
@@ -634,7 +714,7 @@ class YoutubeDL(object):
        keep_video = None
        for pp in self._pps:
            try:
-                keep_video_wish,new_info = pp.run(info)
+                keep_video_wish, new_info = pp.run(info)
                if keep_video_wish is not None:
                    if keep_video_wish:
                        keep_video = keep_video_wish
@@ -672,3 +752,32 @@ class YoutubeDL(object):
        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
            archive_file.write(vid_id + u'\n')
+
+    @staticmethod
+    def format_resolution(format):
+        if format.get('height') is not None:
+            if format.get('width') is not None:
+                res = u'%sx%s' % (format['width'], format['height'])
+            else:
+                res = u'%sp' % format['height']
+        else:
+            res = '???'
+        return res
+
+    def list_formats(self, info_dict):
+        formats_s = []
+        for format in info_dict.get('formats', [info_dict]):
+            formats_s.append(u'%-15s: %-5s     %-15s[%s]' % (
+                format['format_id'],
+                format['ext'],
+                format.get('format_note') or '-',
+                self.format_resolution(format),
+                )
+            )
+        if len(formats_s) != 1:
+            formats_s[0] += ' (worst)'
+            formats_s[-1] += ' (best)'
+        formats_s = "\n".join(formats_s)
+        self.to_screen(u'[info] Available formats for %s:\n'
+            u'format code    extension   note           resolution\n%s' % (
+                info_dict['id'], formats_s))
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -133,7 +133,7 @@ def parseOpts(overrideArguments=None):

    def _hide_login_info(opts):
        opts = list(opts)
-        for private_opt in ['-p', '--password', '-u', '--username']:
+        for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
            try:
                i = opts.index(private_opt)
                opts[i+1] = '<PRIVATE>'
@@ -179,6 +179,9 @@ def parseOpts(overrideArguments=None):
            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
    general.add_option('-i', '--ignore-errors',
            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
+    general.add_option('--abort-on-error',
+            action='store_false', dest='ignoreerrors',
+            help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
    general.add_option('--dump-user-agent',
            action='store_true', dest='dump_user_agent',
            help='display the current browser identification', default=False)
@@ -196,7 +199,7 @@ def parseOpts(overrideArguments=None):
    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
    general.add_option(
-        '--cache-dir', dest='cachedir', default=get_cachedir(),
+        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
        help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
    general.add_option(
        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
@@ -235,7 +238,7 @@ def parseOpts(overrideArguments=None):


    video_format.add_option('-f', '--format',
-            action='store', dest='format', metavar='FORMAT',
+            action='store', dest='format', metavar='FORMAT', default='best',
            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
    video_format.add_option('--all-formats',
            action='store_const', dest='format', help='download all available video formats', const='all')
@@ -332,7 +335,9 @@ def parseOpts(overrideArguments=None):
            help=('output filename template. Use %(title)s to get the title, '
                  '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
                  '%(autonumber)s to get an automatically incremented number, '
-                  '%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), '
+                  '%(ext)s for the filename extension, '
+                  '%(format)s for the format description (like "22 - 1280x720" or "HD")'
+                  '%(upload_date)s for the upload date (YYYYMMDD), '
                  '%(extractor)s for the provider (youtube, metacafe, etc), '
                  '%(id)s for the video id , %(playlist)s for the playlist the video is in, '
                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -102,6 +102,7 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
+from .rutube import RutubeIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
@@ -114,6 +115,7 @@ from .statigram import StatigramIE
 from .steam import SteamIE
 from .sztvhu import SztvHuIE
 from .teamcoco import TeamcocoIE
+from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -174,12 +174,27 @@ class ArteTVPlus7IE(InfoExtractor):
        # Some formats use the m3u8 protocol
        formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
        # We order the formats by quality
-        formats = sorted(formats, key=lambda f: int(f.get('height',-1)))
+        formats = list(formats) # in python3 filter returns an iterator
+        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
+            sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
+        else:
+            sort_key = lambda f: int(f.get('height',-1))
+        formats = sorted(formats, key=sort_key)
        # Prefer videos without subtitles in the same language
        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
        # Pick the best quality
        def _format(format_info):
+            quality = format_info['quality']
+            m_quality = re.match(r'\w*? - (\d*)p', quality)
+            if m_quality is not None:
+                quality = m_quality.group(1)
+            if format_info.get('versionCode') is not None:
+                format_id = u'%s-%s' % (quality, format_info['versionCode'])
+            else:
+                format_id = quality
            info = {
+                'format_id': format_id,
+                'format_note': format_info.get('versionLibelle'),
                'width': format_info.get('width'),
                'height': format_info.get('height'),
            }
@@ -192,8 +207,6 @@ class ArteTVPlus7IE(InfoExtractor):
                info['ext'] = determine_ext(info['url'])
            return info
        info_dict['formats'] = [_format(f) for f in formats]
-        # TODO: Remove when #980 has been merged 
-        info_dict.update(info_dict['formats'][-1])

        return info_dict

@@ -207,7 +220,7 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
        u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
        u'file': u'050489-002.mp4',
        u'info_dict': {
-            u'title': u'Agentur Amateur #2 - Corporate Design',
+            u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design',
        },
    }

--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -53,6 +53,8 @@ class BrightcoveIE(InfoExtractor):
        # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
        object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
                            lambda m: m.group(1) + '/>', object_str)
+        # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
+        object_str = object_str.replace(u'<--', u'<!--')

        object_doc = xml.etree.ElementTree.fromstring(object_str)
        assert u'BrightcoveExperience' in object_doc.attrib['class']
@@ -96,7 +98,10 @@ class BrightcoveIE(InfoExtractor):
        playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
                                               player_key, u'Downloading playlist information')

-        playlist_info = json.loads(playlist_info)['videoList']
+        json_data = json.loads(playlist_info)
+        if 'videoList' not in json_data:
+            raise ExtractorError(u'Empty playlist')
+        playlist_info = json_data['videoList']
        videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]

        return self.playlist_result(videos, playlist_id=playlist_info['id'],
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -14,6 +14,7 @@ from ..utils import (
    clean_html,
    compiled_regex_type,
    ExtractorError,
+    RegexNotFoundError,
    unescapeHTML,
 )

@@ -61,9 +62,12 @@ class InfoExtractor(object):
                    * ext       Will be calculated from url if missing
                    * format    A human-readable description of the format
                                ("mp4 container with h264/opus").
-                                Calculated from width and height if missing.
+                                Calculated from the format_id, width, height 
+                                and format_note fields if missing.
                    * format_id A short description of the format
                                ("mp4_h264_opus" or "19")
+                    * format_note Additional info about the format
+                                ("3D" or "DASH video")
                    * width     Width of the video, if known
                    * height    Height of the video, if known

@@ -228,7 +232,7 @@ class InfoExtractor(object):
        Perform a regex search on the given string, using a single or a list of
        patterns returning the first matching group.
        In case of failure return a default value or raise a WARNING or a
-        ExtractorError, depending on fatal, specifying the field name.
+        RegexNotFoundError, depending on fatal, specifying the field name.
        """
        if isinstance(pattern, (str, compat_str, compiled_regex_type)):
            mobj = re.search(pattern, string, flags)
@@ -248,7 +252,7 @@ class InfoExtractor(object):
        elif default is not None:
            return default
        elif fatal:
-            raise ExtractorError(u'Unable to extract %s' % _name)
+            raise RegexNotFoundError(u'Unable to extract %s' % _name)
        else:
            self._downloader.report_warning(u'unable to extract %s; '
                u'please report this issue on http://yt-dl.org/bug' % _name)
@@ -365,7 +369,7 @@ class SearchInfoExtractor(InfoExtractor):

    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
-        raise NotImplementedError("This method must be implemented by sublclasses")
+        raise NotImplementedError("This method must be implemented by subclasses")

    @property
    def SEARCH_KEY(self):
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -28,6 +28,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):

    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
    IE_NAME = u'dailymotion'
+
+    _FORMATS = [
+        (u'stream_h264_ld_url', u'ld'),
+        (u'stream_h264_url', u'standard'),
+        (u'stream_h264_hq_url', u'hq'),
+        (u'stream_h264_hd_url', u'hd'),
+        (u'stream_h264_hd1080_url', u'hd180'),
+    ]
+
    _TESTS = [
        {
            u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@@ -60,7 +69,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):

        video_id = mobj.group(1).split('_')[0].split('?')[0]

-        video_extension = 'mp4'
        url = 'http://www.dailymotion.com/video/%s' % video_id

        # Retrieve video webpage to extract further information
@@ -99,18 +107,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
            raise ExtractorError(msg, expected=True)

-        # TODO: support choosing qualities
-
-        for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
-                    'stream_h264_hq_url','stream_h264_url',
-                    'stream_h264_ld_url']:
-            if info.get(key):#key in info and info[key]:
-                max_quality = key
-                self.to_screen(u'Using %s' % key)
-                break
-        else:
+        formats = []
+        for (key, format_id) in self._FORMATS:
+            video_url = info.get(key)
+            if video_url is not None:
+                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
+                if m_size is not None:
+                    width, height = m_size.group(1), m_size.group(2)
+                else:
+                    width, height = None, None
+                formats.append({
+                    'url': video_url,
+                    'ext': 'mp4',
+                    'format_id': format_id,
+                    'width': width,
+                    'height': height,
+                })
+        if not formats:
            raise ExtractorError(u'Unable to extract video URL')
-        video_url = info[max_quality]

        # subtitles
        video_subtitles = self.extract_subtitles(video_id)
@@ -120,11 +134,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):

        return [{
            'id':       video_id,
-            'url':      video_url,
+            'formats': formats,
            'uploader': video_uploader,
            'upload_date':  video_upload_date,
            'title':    self._og_search_title(webpage),
-            'ext':      video_extension,
            'subtitles':    video_subtitles,
            'thumbnail': info['thumbnail_url']
        }]
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -142,12 +142,19 @@ class GenericIE(InfoExtractor):

        # Look for embedded Vimeo player
        mobj = re.search(
-            r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage)
+            r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
        if mobj:
            player_url = unescapeHTML(mobj.group(1))
            surl = smuggle_url(player_url, {'Referer': url})
            return self.url_result(surl, 'Vimeo')

+        # Look for embedded YouTube player
+        mobj = re.search(
+            r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage)
+        if mobj:
+            surl = unescapeHTML(mobj.group(1))
+            return self.url_result(surl, 'Youtube')
+
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -41,9 +41,9 @@ class GooglePlusIE(InfoExtractor):

        # Extract update date
        upload_date = self._html_search_regex(
-            r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*>
+            r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
                    ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
-            webpage, u'upload date', fatal=False)
+            webpage, u'upload date', fatal=False, flags=re.VERBOSE)
        if upload_date:
            # Convert timestring to a format suitable for filename
            upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -19,7 +19,7 @@ class InternetVideoArchiveIE(InfoExtractor):
        u'info_dict': {
            u'title': u'SKYFALL',
            u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
-            u'duration': 156,
+            u'duration': 153,
        },
    }

@@ -74,7 +74,7 @@ class InternetVideoArchiveIE(InfoExtractor):
            })
        formats = sorted(formats, key=lambda f: f['bitrate'])

-        info = {
+        return {
            'id': video_id,
            'title': item.find('title').text,
            'formats': formats,
@@ -82,6 +82,3 @@ class InternetVideoArchiveIE(InfoExtractor):
            'description': item.find('description').text,
            'duration': int(attr['duration']),
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -90,8 +90,8 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
             r'{statusIndex:0,index:0,.*?id:(.*?),'],
            webpage, u'category id')
        playlist_title = self._html_search_regex(
-            r'\?catid=%s">(.*?)</a>' % cat_id,
-            webpage, u'playlist title', flags=re.DOTALL)
+            r'tab0"[^>]*?>(.*?)</td>',
+            webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()

        data = compat_urllib_parse.urlencode({
            'cid': cat_id,
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@@ -20,7 +20,10 @@ class NowVideoIE(InfoExtractor):

        video_id = mobj.group('id')
        webpage_url = 'http://www.nowvideo.ch/video/' + video_id
+        embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
        webpage = self._download_webpage(webpage_url, video_id)
+        embed_page = self._download_webpage(embed_url, video_id,
+            u'Downloading embed page')

        self.report_extraction(video_id)

@@ -28,7 +31,7 @@ class NowVideoIE(InfoExtractor):
            webpage, u'video title')

        video_key = self._search_regex(r'var fkzd="(.*)";',
-            webpage, u'video key')
+            embed_page, u'video key')

        api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
        api_response = self._download_webpage(api_call, video_id,
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -10,7 +10,8 @@ class RedTubeIE(InfoExtractor):
        u'file': u'66418.mp4',
        u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
        u'info_dict': {
-            u"title": u"Sucked on a toilet"
+            u"title": u"Sucked on a toilet",
+            u"age_limit": 18,
        }
    }

--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -0,0 +1,58 @@
+# encoding: utf-8
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    compat_str,
+    ExtractorError,
+)
+
+
+class RutubeIE(InfoExtractor):
+    _VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
+
+    _TEST = {
+        u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
+        u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4',
+        u'info_dict': {
+            u'title': u'Раненный кенгуру забежал в аптеку',
+            u'uploader': u'NTDRussian',
+            u'uploader_id': u'29790',
+        },
+        u'params': {
+            # It requires ffmpeg (m3u8 download)
+            u'skip_download': True,
+        },
+    }
+
+    def _get_api_response(self, short_id, subpath):
+        api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
+        response_json = self._download_webpage(api_url, short_id,
+            u'Downloading %s json' % subpath)
+        return json.loads(response_json)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        long_id = mobj.group('long_id')
+        webpage = self._download_webpage(url, long_id)
+        og_video = self._og_search_video_url(webpage)
+        short_id = compat_urlparse.urlparse(og_video).path[1:]
+        options = self._get_api_response(short_id, 'options')
+        trackinfo = self._get_api_response(short_id, 'trackinfo')
+        # Some videos don't have the author field
+        author = trackinfo.get('author') or {}
+        m3u8_url = trackinfo['video_balancer'].get('m3u8')
+        if m3u8_url is None:
+            raise ExtractorError(u'Couldn\'t find m3u8 manifest url')
+
+        return {
+            'id': trackinfo['id'],
+            'title': trackinfo['title'],
+            'url': m3u8_url,
+            'ext': 'mp4',
+            'thumbnail': options['thumbnail_url'],
+            'uploader': author.get('name'),
+            'uploader_id': compat_str(author['id']) if author else None,
+        }
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@@ -0,0 +1,65 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    get_element_by_attribute,
+    clean_html,
+)
+
+
+class TechTalksIE(InfoExtractor):
+    _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
+
+    _TEST = {
+        u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
+        u'playlist': [
+            {
+                u'file': u'57758.flv',
+                u'info_dict': {
+                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                },
+            },
+            {
+                u'file': u'57758-slides.flv',
+                u'info_dict': {
+                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                },
+            },
+        ],
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        talk_id = mobj.group('id')
+        webpage = self._download_webpage(url, talk_id)
+        rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
+            u'rtmp url')
+        play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
+            webpage, u'presenter play path')
+        title = clean_html(get_element_by_attribute('class', 'title', webpage))
+        video_info = {
+                'id': talk_id,
+                'title': title,
+                'url': rtmp_url,
+                'play_path': play_path,
+                'ext': 'flv',
+            }
+        m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
+        if m_slides is None:
+            return video_info
+        else:
+            return [
+                video_info,
+                # The slides video
+                {
+                    'id': talk_id + '-slides',
+                    'title': title,
+                    'url': rtmp_url,
+                    'play_path': m_slides.group(1),
+                    'ext': 'flv',
+                },
+            ]
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dl/extractor/tudou.py
@@ -48,7 +48,8 @@ class TudouIE(InfoExtractor):
                'ie_key': 'Youku'
            }

-        title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title')
+        title = self._search_regex(
+            r",kw:\s*['\"](.+?)[\"']", webpage, u'title')
        thumbnail_url = self._search_regex(
            r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)

--- a/youtube_dl/extractor/videodetective.py
+++ b/youtube_dl/extractor/videodetective.py
@@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor):
        u'info_dict': {
            u'title': u'KICK-ASS 2',
            u'description': u'md5:65ba37ad619165afac7d432eaded6013',
-            u'duration': 138,
+            u'duration': 135,
        },
    }

--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
 import json
 import re
 import itertools
@@ -10,6 +11,7 @@ from ..utils import (
    clean_html,
    get_element_by_attribute,
    ExtractorError,
+    RegexNotFoundError,
    std_headers,
    unsmuggle_url,
 )
@@ -25,7 +27,7 @@ class VimeoIE(InfoExtractor):
        {
            u'url': u'http://vimeo.com/56015672',
            u'file': u'56015672.mp4',
-            u'md5': u'8879b6cc097e987f02484baf890129e5',
+            u'md5': u'ae7a1d8b183758a0506b0622f37dfa14',
            u'info_dict': {
                u"upload_date": u"20121220", 
                u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
@@ -54,7 +56,22 @@ class VimeoIE(InfoExtractor):
                u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
                u'uploader': u'The BLN & Business of Software',
            },
-        }
+        },
+        {
+            u'url': u'http://vimeo.com/68375962',
+            u'file': u'68375962.mp4',
+            u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7',
+            u'note': u'Video protected with password',
+            u'info_dict': {
+                u'title': u'youtube-dl password protected test video',
+                u'upload_date': u'20130614',
+                u'uploader_id': u'user18948128',
+                u'uploader': u'Jaime Marquínez Ferrándiz',
+            },
+            u'params': {
+                u'videopassword': u'youtube-dl',
+            },
+        },
    ]

    def _login(self):
@@ -129,18 +146,26 @@ class VimeoIE(InfoExtractor):

        # Extract the config JSON
        try:
-            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
-                webpage, u'info section', flags=re.DOTALL)
-            config = json.loads(config)
-        except:
+            try:
+                config_url = self._html_search_regex(
+                    r' data-config-url="(.+?)"', webpage, u'config URL')
+                config_json = self._download_webpage(config_url, video_id)
+                config = json.loads(config_json)
+            except RegexNotFoundError:
+                # For pro videos or player.vimeo.com urls
+                config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
+                    webpage, u'info section', flags=re.DOTALL)
+                config = json.loads(config)
+        except Exception as e:
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')

-            if re.search('If so please provide the correct password.', webpage):
+            if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
                self._verify_video_password(url, video_id, webpage)
                return self._real_extract(url)
            else:
-                raise ExtractorError(u'Unable to extract info section')
+                raise ExtractorError(u'Unable to extract info section',
+                                     cause=e)

        # Extract title
        video_title = config["video"]["title"]
@@ -179,46 +204,45 @@ class VimeoIE(InfoExtractor):

        # Vimeo specific: extract video codec and quality information
        # First consider quality, then codecs, then take everything
-        # TODO bind to format param
-        codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
+        codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
        files = { 'hd': [], 'sd': [], 'other': []}
        config_files = config["video"].get("files") or config["request"].get("files")
        for codec_name, codec_extension in codecs:
-            if codec_name in config_files:
-                if 'hd' in config_files[codec_name]:
-                    files['hd'].append((codec_name, codec_extension, 'hd'))
-                elif 'sd' in config_files[codec_name]:
-                    files['sd'].append((codec_name, codec_extension, 'sd'))
+            for quality in config_files.get(codec_name, []):
+                format_id = '-'.join((codec_name, quality)).lower()
+                key = quality if quality in files else 'other'
+                video_url = None
+                if isinstance(config_files[codec_name], dict):
+                    file_info = config_files[codec_name][quality]
+                    video_url = file_info.get('url')
                else:
-                    files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
+                    file_info = {}
+                if video_url is None:
+                    video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
+                        %(video_id, sig, timestamp, quality, codec_name.upper())

-        for quality in ('hd', 'sd', 'other'):
-            if len(files[quality]) > 0:
-                video_quality = files[quality][0][2]
-                video_codec = files[quality][0][0]
-                video_extension = files[quality][0][1]
-                self.to_screen(u'%s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
-                break
-        else:
+                files[key].append({
+                    'ext': codec_extension,
+                    'url': video_url,
+                    'format_id': format_id,
+                    'width': file_info.get('width'),
+                    'height': file_info.get('height'),
+                })
+        formats = []
+        for key in ('other', 'sd', 'hd'):
+            formats += files[key]
+        if len(formats) == 0:
            raise ExtractorError(u'No known codec found')

-        video_url = None
-        if isinstance(config_files[video_codec], dict):
-            video_url = config_files[video_codec][video_quality].get("url")
-        if video_url is None:
-            video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
-                        %(video_id, sig, timestamp, video_quality, video_codec.upper())
-
        return [{
            'id':       video_id,
-            'url':      video_url,
            'uploader': video_uploader,
            'uploader_id': video_uploader_id,
            'upload_date':  video_upload_date,
            'title':    video_title,
-            'ext':      video_extension,
            'thumbnail':    video_thumbnail,
            'description':  video_description,
+            'formats': formats,
        }]


--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -19,7 +19,8 @@ class XHamsterIE(InfoExtractor):
        u'info_dict': {
            u"upload_date": u"20121014", 
            u"uploader_id": u"Ruseful2011", 
-            u"title": u"FemaleAgent Shy beauty takes the bait"
+            u"title": u"FemaleAgent Shy beauty takes the bait",
+            u"age_limit": 18,
        }
    },
    {
@@ -27,9 +28,10 @@ class XHamsterIE(InfoExtractor):
        u'file': u'2221348.flv',
        u'md5': u'e767b9475de189320f691f49c679c4c7',
        u'info_dict': {
-            u"upload_date": u"20130914", 
-            u"uploader_id": u"jojo747400", 
-            u"title": u"Britney Spears  Sexy Booty"
+            u"upload_date": u"20130914",
+            u"uploader_id": u"jojo747400",
+            u"title": u"Britney Spears  Sexy Booty",
+            u"age_limit": 18,
        }
    }]

@@ -72,6 +74,8 @@ class XHamsterIE(InfoExtractor):
        video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
            webpage, u'thumbnail', fatal=False)

+        age_limit = self._rta_search(webpage)
+
        return [{
            'id':       video_id,
            'url':      video_url,
@@ -80,5 +84,6 @@ class XHamsterIE(InfoExtractor):
            'description': video_description,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
-            'thumbnail': video_thumbnail
+            'thumbnail': video_thumbnail,
+            'age_limit': age_limit,
        }]
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -18,7 +18,8 @@ class XNXXIE(InfoExtractor):
        u'file': u'1135332.flv',
        u'md5': u'0831677e2b4761795f68d417e0b7b445',
        u'info_dict': {
-            u"title": u"lida \u00bb Naked Funny Actress  (5)"
+            u"title": u"lida \u00bb Naked Funny Actress  (5)",
+            u"age_limit": 18,
        }
    }

@@ -50,4 +51,5 @@ class XNXXIE(InfoExtractor):
            'ext': 'flv',
            'thumbnail': video_thumbnail,
            'description': None,
+            'age_limit': 18,
        }]
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -13,7 +13,8 @@ class XVideosIE(InfoExtractor):
        u'file': u'939581.flv',
        u'md5': u'1d0c835822f0a71a7bf011855db929d0',
        u'info_dict': {
-            u"title": u"Funny Porns By >>>>S<<<<<< -1"
+            u"title": u"Funny Porns By >>>>S<<<<<< -1",
+            u"age_limit": 18,
        }
    }

@@ -46,6 +47,7 @@ class XVideosIE(InfoExtractor):
            'ext': 'flv',
            'thumbnail': video_thumbnail,
            'description': None,
+            'age_limit': 18,
        }

        return [info]
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -26,7 +26,8 @@ class YouPornIE(InfoExtractor):
            u"upload_date": u"20101221", 
            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
            u"uploader": u"Ask Dan And Jennifer", 
-            u"title": u"Sex Ed: Is It Safe To Masturbate Daily?"
+            u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
+            u"age_limit": 18,
        }
    }

--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -236,11 +236,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        '136': 'mp4',
        '137': 'mp4',
        '138': 'mp4',
-        '139': 'mp4',
-        '140': 'mp4',
-        '141': 'mp4',
        '160': 'mp4',

+        # Dash mp4 audio
+        '139': 'm4a',
+        '140': 'm4a',
+        '141': 'm4a',
+
        # Dash webm
        '171': 'webm',
        '172': 'webm',
@@ -1150,7 +1152,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            list_page = self._download_webpage(list_url, video_id)
            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
            original_lang_node = caption_list.find('track')
-            if original_lang_node.attrib.get('kind') != 'asr' :
+            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                self._downloader.report_warning(u'Video doesn\'t have automatic captions')
                return {}
            original_lang = original_lang_node.attrib['lang_code']
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -572,6 +572,11 @@ class ExtractorError(Exception):
        return u''.join(traceback.format_tb(self.traceback))


+class RegexNotFoundError(ExtractorError):
+    """Error when a regex didn't match"""
+    pass
+
+
 class DownloadError(Exception):
    """Download Error exception.

@@ -947,6 +952,15 @@ def shell_quote(args):
    return ' '.join(map(pipes.quote, args))


+def takewhile_inclusive(pred, seq):
+    """ Like itertools.takewhile, but include the latest evaluated element
+        (the first element so that Not pred(e)) """
+    for e in seq:
+        yield e
+        if not pred(e):
+            return
+
+
 def smuggle_url(url, data):
    """ Pass additional data in a URL for internal use. """

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.10.15'
+__version__ = '2013.10.23.2'
Author	SHA1	Message	Date
Philipp Hagemeister	1cf64ee468	release 2013.10.23.2	2013-10-23 18:38:09 +02:00
Jaime Marquínez Ferrándiz	cdec0190c4	[dailymotion] Extract all the available formats (closes #1028 )	2013-10-23 17:33:38 +02:00
Jaime Marquínez Ferrándiz	2450bcb28b	[nowvideo] Fix key extraction Extract it from the embed page	2013-10-23 17:00:33 +02:00
Jaime Marquínez Ferrándiz	3126050c0f	Hide the video password on verbose mode	2013-10-23 16:32:17 +02:00
Jaime Marquínez Ferrándiz	93b22c7828	[vimeo] fix the extraction for videos protected with password Added a test video.	2013-10-23 16:31:53 +02:00
Philipp Hagemeister	0a89b2852e	release 2013.10.23.1	2013-10-23 15:12:33 +02:00
Jaime Marquínez Ferrándiz	55b3e45bba	[vimeo] Fix pro videos and player.vimeo.com urls The old process can still be used for those videos. Added RegexNotFoundError, which is raised by _search_regex if it can't extract the info.	2013-10-23 14:38:03 +02:00
Philipp Hagemeister	365bcf6d97	Merge remote-tracking branch 'origin/master'	2013-10-23 11:40:46 +02:00
Philipp Hagemeister	71907db3ba	[vimeo] Fix normal videos (Fixes #1642 ) Vimeo Pro Videos are still broken	2013-10-23 11:38:53 +02:00
Philipp Hagemeister	6803655ced	Merge pull request #1622 from rbrito/fix-extension extractor: youtube: Set extension of AAC audio formats to m4a.	2013-10-22 15:16:26 -07:00
Philipp Hagemeister	df1c39ec5c	release 2013.10.23	2013-10-23 00:07:27 +02:00
Philipp Hagemeister	80f55a9511	release 2013.10.22	2013-10-22 22:35:13 +02:00
Philipp Hagemeister	7853cc5ae1	Merge remote-tracking branch 'origin/master' Conflicts: youtube_dl/YoutubeDL.py	2013-10-22 22:30:06 +02:00
Philipp Hagemeister	586a91b67f	Expand tilde in template (Fixes #1639 )	2013-10-22 22:28:26 +02:00
Jaime Marquínez Ferrándiz	b028e96144	[arte.tv:creative] Update the title of the test	2013-10-22 21:06:06 +02:00
Jaime Marquínez Ferrándiz	ce68b5907c	[nhl:videocenter] Fix playlist title extraction	2013-10-22 21:01:16 +02:00
Jaime Marquínez Ferrándiz	fe7e0c9825	Style fixes in YoutubeDL.py Fixed some of the problems reported by pep8	2013-10-22 14:49:34 +02:00
Jaime Marquínez Ferrándiz	12893efe01	Respect the download parameter in YoutubeDL.process_video_result if the extractor handle the format selection	2013-10-22 00:01:59 +02:00
Joshua Elsasser	a6387bfd3c	[vimeo] Implement the new format selection system (closes PR #996 ) Rebased and deleted some parts to use the new system instead of copying the one from YoutubeIE	2013-10-21 23:16:11 +02:00
Jaime Marquínez Ferrándiz	f6a54188c2	[youtube] Use 'node is None' when checking if the video has automatic captions It had stopped working and it reports a FutureWarning	2013-10-21 16:28:55 +02:00
Jaime Marquínez Ferrándiz	cbbd9a9c69	Fix the duration field for the VideoDetective and InternetVideoArchive tests Also remove the use of the old format system and the comment	2013-10-21 15:07:33 +02:00
Jaime Marquínez Ferrándiz	685a9cd2f1	[googleplus] Fix upload_date extraction	2013-10-21 15:00:21 +02:00
Jaime Marquínez Ferrándiz	182a107877	[arte] Set the format_note and the format_id fields (closes #1628 )	2013-10-21 14:42:30 +02:00
Jaime Marquínez Ferrándiz	8c51aa6506	The 'format' field now defaults to '{format_id} - {width}x{height}{format_note}' Following the YoutubeIE format. The 'format_note' gives additional info about the format, for example '3D' or 'DASH video'.	2013-10-21 14:42:06 +02:00
Jaime Marquínez Ferrándiz	3fd39e37f2	YoutubeDL: remove method that came from FileDownloader	2013-10-21 13:52:24 +02:00
Jaime Marquínez Ferrándiz	49e86983e7	Allow to use the extension for the format selection The best format with the extension is downloaded.	2013-10-21 13:31:55 +02:00
Jaime Marquínez Ferrándiz	a9c58ad945	Accept requested formats to be in the format 35/best (closes #1552 ) The format selection code is now an independent function.	2013-10-21 13:19:58 +02:00
Philipp Hagemeister	f8b45beacc	Merge remote-tracking branch 'rbrito/set-age' Conflicts: youtube_dl/extractor/xhamster.py	2013-10-19 21:16:14 +02:00
Philipp Hagemeister	9d92015d43	[xhamster] Add support for age_limit (Instead of #1627 )	2013-10-19 21:09:48 +02:00
Rogério Brito	50a6150ed9	extractor: Set age limit on some adult-related extractors. More age limit of videos for adult-related sites. Note that, for redtube, I explicitly left the variable containing the age limit, since the comment justifying the age limit is a good thing to have. That being said, I included the age limit field on the test, to better reflect what the information extractor does (even if it may not break the automated tests). Signed-off-by: Rogério Brito <rbrito@ime.usp.br>	2013-10-19 14:19:25 -03:00
Philipp Hagemeister	284acd57d6	Add an author email	2013-10-19 11:14:20 +02:00
Rogério Brito	8ed6b34477	extractor: Set age limit on some adult-related extractors. This is similar in spirit to what was done in commit `8e590a117f`. Signed-off-by: Rogério Brito <rbrito@ime.usp.br>	2013-10-18 19:32:37 -03:00
Rogério Brito	f6f1fc9286	extractor: youtube: Fix extension of dash formats. While we are at it, separate the audio formats from the video formats. Signed-off-by: Rogério Brito <rbrito@ime.usp.br>	2013-10-18 18:53:00 -03:00
Philipp Hagemeister	8e590a117f	[xnxx] Add age_limit	2013-10-18 23:35:17 +02:00
Philipp Hagemeister	d5594202aa	Simplify release process	2013-10-18 23:34:55 +02:00
Philipp Hagemeister	b186d949cf	release 2013.10.18.2	2013-10-18 23:22:54 +02:00
Philipp Hagemeister	3d2986063c	[bash-completion] Do not use dash in function name (Fixes #1623 )	2013-10-18 23:13:46 +02:00
Philipp Hagemeister	41fd7c7e60	Add new option --abort-on-error	2013-10-18 23:09:32 +02:00
Philipp Hagemeister	fdefe96bf2	Document %(format)s (#1612 )	2013-10-18 23:09:08 +02:00
Rogério Brito	16f36a6fc9	extractor: youtube: Set extension of AAC audio formats to m4a. This, in particular, eases downloading both audio and videos in DASH formats before muxing them, which alleviates the problem that I exposed on issue Furthermore, one may argue that this is, indeed, the case for correctness's sake. Signed-off-by: Rogério Brito <rbrito@ime.usp.br>	2013-10-18 17:50:55 -03:00
Philipp Hagemeister	cce722b79c	Add metavar to --cache-dir	2013-10-18 11:50:48 +02:00
Philipp Hagemeister	82697fb2ab	release 2013.10.18.1	2013-10-18 11:45:30 +02:00
Philipp Hagemeister	53c1d3ef49	Check for embedded YouTube player (Fixes #1616 )	2013-10-18 11:44:57 +02:00
Philipp Hagemeister	8e55e9abfc	release 2013.10.18	2013-10-18 11:17:21 +02:00
Philipp Hagemeister	7c58ef3275	[tudou] Fix title regex (Fixes #1614 )	2013-10-18 11:16:20 +02:00
Philipp Hagemeister	416a5efce7	fix typos	2013-10-18 00:49:45 +02:00
Philipp Hagemeister	f4d96df0f1	Extend #980 with --max-quality support	2013-10-18 00:46:35 +02:00
Philipp Hagemeister	5d254f776a	Fix test	2013-10-18 00:27:51 +02:00
Philipp Hagemeister	1c1218fefc	Merge remote-tracking branch 'jaimeMF/format_selection'	2013-10-18 00:17:03 +02:00
Jaime Marquínez Ferrándiz	d21ab29200	Add an extractor for techtalks.tv (closes #1606 )	2013-10-17 08:20:58 +02:00
Philipp Hagemeister	54ed626cf8	release 2013.10.17	2013-10-17 02:20:26 +02:00
Philipp Hagemeister	a733eb6c53	[youtube] Do not crash if caption info is missing altogether (Fixes #1610 )	2013-10-17 02:19:19 +02:00
Philipp Hagemeister	591454798d	[brightcove] Raise error if playlist is empty (#1608 )	2013-10-17 01:02:17 +02:00
Philipp Hagemeister	38604f1a4f	Merge remote-tracking branch 'origin/master'	2013-10-17 00:55:06 +02:00
Philipp Hagemeister	2d0efe70a6	[brightcove] Fix more broken XML (#1608 )	2013-10-17 00:46:11 +02:00
Jaime Marquínez Ferrándiz	bfd14b1b2f	Add an extractor for rutube.ru (closes #1136 ) It downloads with a m3u8 manifest, requires ffmpeg.	2013-10-16 16:57:40 +02:00
Jaime Marquínez Ferrándiz	76965512da	Fix the indentation of the Makefile It uses tabs, no spaces.	2013-10-15 23:15:15 +02:00
Jaime Marquínez Ferrándiz	996d1c3242	Don't include the test/testdata directory in the youtube-dl.tar.gz The last releases included big files that increased the size of the compressed file.	2013-10-15 23:08:52 +02:00
Jaime Marquínez Ferrándiz	8016c92297	Fix the default values of format_id and format	2013-10-11 16:34:49 +02:00
Jaime Marquínez Ferrándiz	e028d0d1e3	Implement the prefer_free_formats in YoutubeDL	2013-10-11 16:34:49 +02:00
Jaime Marquínez Ferrándiz	79819f58f2	Default 'format' field to {width}x{height} If width is None, use {height}p and if height is None, '???'	2013-10-11 16:34:49 +02:00
Jaime Marquínez Ferrándiz	6ff000b888	Do not handle format selection for IEs that already handle it	2013-10-11 16:34:48 +02:00
Jaime Marquínez Ferrándiz	99e206d508	Implement the max quality option in YoutubeDL	2013-10-11 16:34:48 +02:00
Jaime Marquínez Ferrándiz	dd82ffea0c	Implement format selection in YoutubeDL Now the IEs can set a formats field in the info_dict, with the formats ordered from worst to best quality. It's a list of dicts with the following fields: * Mandatory: url and ext * Optional: format and format_id The format_id is used for choosing which formats have to be downloaded. Now a video result is processed by the method process_video_result.	2013-10-11 16:34:48 +02:00