release 2013.12.16.4

[facebook] Recognize #! URLs (Fixes #1988 )
Merge pull request #1987 from rzhxeo/blip
2013-12-16 21:10:18 +01:00 · 2013-12-16 21:10:06 +01:00 · 2013-12-16 11:28:34 -08:00 · 2013-12-16 20:08:23 +01:00 · 2013-12-16 09:04:36 -08:00 · 2013-12-16 20:14:28 +03:30
35 changed files with 930 additions and 180 deletions
--- a/README.md
+++ b/README.md
@@ -56,6 +56,10 @@ which means you can modify it, redistribute it or use it however you like.
    --date DATE                download only videos uploaded in this date
    --datebefore DATE          download only videos uploaded before this date
    --dateafter DATE           download only videos uploaded after this date
+    --min-views COUNT          Do not download any videos with less than COUNT
+                               views
+    --max-views COUNT          Do not download any videos with more than COUNT
+                               views
    --no-playlist              download only the currently playing video
    --age-limit YEARS          download only videos suitable for the given age
    --download-archive FILE    Download only videos not listed in the archive
@@ -127,6 +131,7 @@ which means you can modify it, redistribute it or use it however you like.
    --get-id                   simulate, quiet but print id
    --get-thumbnail            simulate, quiet but print thumbnail URL
    --get-description          simulate, quiet but print video description
+    --get-duration             simulate, quiet but print video length
    --get-filename             simulate, quiet but print output filename
    --get-format               simulate, quiet but print output format
    -j, --dump-json            simulate, quiet but print JSON information
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -7,6 +7,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 from test.helper import FakeYDL
+from youtube_dl import YoutubeDL


 class YDL(FakeYDL):
@@ -140,6 +141,20 @@ class TestFormatSelection(unittest.TestCase):
        self.assertEqual(test_dict['extractor'], 'Foo')
        self.assertEqual(test_dict['playlist'], 'funny videos')

+    def test_prepare_filename(self):
+        info = {
+            u'id': u'1234',
+            u'ext': u'mp4',
+            u'width': None,
+        }
+        def fname(templ):
+            ydl = YoutubeDL({'outtmpl': templ})
+            return ydl.prepare_filename(info)
+        self.assertEqual(fname(u'%(id)s.%(ext)s'), u'1234.mp4')
+        self.assertEqual(fname(u'%(id)s-%(width)s.%(ext)s'), u'1234-NA.mp4')
+        # Replace missing fields with 'NA'
+        self.assertEqual(fname(u'%(uploader_date)s-%(id)s.%(ext)s'), u'NA-1234.mp4')
+

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import get_testcases

 from youtube_dl.extractor import (
+    FacebookIE,
    gen_extractors,
    JustinTVIE,
    YoutubeIE,
@@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')

+    def test_facebook_matching(self):
+        self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+
    def test_no_duplicates(self):
        ies = gen_extractors()
        for tc in get_testcases():
            url = tc['url']
            for ie in ies:
-                if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
+                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
                    self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
                else:
                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
@@ -110,6 +114,9 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
        self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])

+    # https://github.com/rg3/youtube-dl/issues/1930
+    def test_soundcloud_not_matching_sets(self):
+        self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -3,6 +3,7 @@

 from __future__ import absolute_import

+import collections
 import errno
 import io
 import json
@@ -33,6 +34,7 @@ from .utils import (
    encodeFilename,
    ExtractorError,
    format_bytes,
+    formatSeconds,
    get_term_width,
    locked_file,
    make_HTTPS_handler,
@@ -93,6 +95,7 @@ class YoutubeDL(object):
    forcethumbnail:    Force printing thumbnail URL.
    forcedescription:  Force printing description.
    forcefilename:     Force printing final filename.
+    forceduration:     Force printing duration.
    forcejson:         Force printing info_dict as JSON.
    simulate:          Do not download the video files.
    format:            Video format code.
@@ -126,7 +129,16 @@ class YoutubeDL(object):
    noplaylist:        Download single video instead of a playlist if in doubt.
    age_limit:         An integer representing the user's age in years.
                       Unsuitable videos for the given age are skipped.
-    download_archive:   File name of a file where all downloads are recorded.
+    min_views:         An integer representing the minimum view count the video
+                       must have in order to not be skipped.
+                       Videos without view count information are always
+                       downloaded. None for no limit.
+    max_views:         An integer representing the maximum view count.
+                       Videos that are more popular than that are not
+                       downloaded.
+                       Videos without view count information are always
+                       downloaded. None for no limit.
+    download_archive:  File name of a file where all downloads are recorded.
                       Videos already present in the file are not downloaded
                       again.
    cookiefile:        File name where cookies should be read from and dumped to.
@@ -354,22 +366,6 @@ class YoutubeDL(object):
        error_message = u'%s %s' % (_msg_header, message)
        self.trouble(error_message, tb)

-    def report_writedescription(self, descfn):
-        """ Report that the description file is being written """
-        self.to_screen(u'[info] Writing video description to: ' + descfn)
-
-    def report_writesubtitles(self, sub_filename):
-        """ Report that the subtitles file is being written """
-        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
-
-    def report_writeinfojson(self, infofn):
-        """ Report that the metadata file has been written """
-        self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
-
-    def report_writeannotations(self, annofn):
-        """ Report that the annotations file has been written. """
-        self.to_screen(u'[info] Writing video annotations to: ' + annofn)
-
    def report_file_already_downloaded(self, file_name):
        """Report file has already been fully downloaded."""
        try:
@@ -396,18 +392,17 @@ class YoutubeDL(object):
                template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']

            sanitize = lambda k, v: sanitize_filename(
-                u'NA' if v is None else compat_str(v),
+                compat_str(v),
                restricted=self.params.get('restrictfilenames'),
                is_id=(k == u'id'))
            template_dict = dict((k, sanitize(k, v))
-                                 for k, v in template_dict.items())
+                                 for k, v in template_dict.items()
+                                 if v is not None)
+            template_dict = collections.defaultdict(lambda: u'NA', template_dict)

            tmpl = os.path.expanduser(self.params['outtmpl'])
            filename = tmpl % template_dict
            return filename
-        except KeyError as err:
-            self.report_error(u'Erroneous output template')
-            return None
        except ValueError as err:
            self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
            return None
@@ -415,13 +410,14 @@ class YoutubeDL(object):
    def _match_entry(self, info_dict):
        """ Returns None iff the file should be downloaded """

+        video_title = info_dict.get('title', info_dict.get('id', u'video'))
        if 'title' in info_dict:
            # This can happen when we're just evaluating the playlist
            title = info_dict['title']
            matchtitle = self.params.get('matchtitle', False)
            if matchtitle:
                if not re.search(matchtitle, title, re.IGNORECASE):
-                    return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+                    return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
            rejecttitle = self.params.get('rejecttitle', False)
            if rejecttitle:
                if re.search(rejecttitle, title, re.IGNORECASE):
@@ -430,14 +426,21 @@ class YoutubeDL(object):
        if date is not None:
            dateRange = self.params.get('daterange', DateRange())
            if date not in dateRange:
-                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+                return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+        view_count = info_dict.get('view_count', None)
+        if view_count is not None:
+            min_views = self.params.get('min_views')
+            if min_views is not None and view_count < min_views:
+                return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+            max_views = self.params.get('max_views')
+            if max_views is not None and view_count > max_views:
+                return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
        age_limit = self.params.get('age_limit')
        if age_limit is not None:
            if age_limit < info_dict.get('age_limit', 0):
                return u'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
-            return (u'%s has already been recorded in archive'
-                    % info_dict.get('title', info_dict.get('id', u'video')))
+            return u'%s has already been recorded in archive' % video_title
        return None

    @staticmethod
@@ -554,16 +557,16 @@ class YoutubeDL(object):

            n_all_entries = len(ie_result['entries'])
            playliststart = self.params.get('playliststart', 1) - 1
-            playlistend = self.params.get('playlistend', -1)
-
+            playlistend = self.params.get('playlistend', None)
+            # For backwards compatibility, interpret -1 as whole list
            if playlistend == -1:
-                entries = ie_result['entries'][playliststart:]
-            else:
-                entries = ie_result['entries'][playliststart:playlistend]
+                playlistend = None

+            entries = ie_result['entries'][playliststart:playlistend]
            n_entries = len(entries)

-            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
+            self.to_screen(
+                u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
                (ie_result['extractor'], playlist, n_all_entries, n_entries))

            for i, entry in enumerate(entries, 1):
@@ -748,6 +751,8 @@ class YoutubeDL(object):
            self.to_stdout(info_dict['description'])
        if self.params.get('forcefilename', False) and filename is not None:
            self.to_stdout(filename)
+        if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+            self.to_stdout(formatSeconds(info_dict['duration']))
        if self.params.get('forceformat', False):
            self.to_stdout(info_dict['format'])
        if self.params.get('forcejson', False):
@@ -770,28 +775,34 @@ class YoutubeDL(object):
            return

        if self.params.get('writedescription', False):
-            try:
-                descfn = filename + u'.description'
-                self.report_writedescription(descfn)
-                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
-                    descfile.write(info_dict['description'])
-            except (KeyError, TypeError):
-                self.report_warning(u'There\'s no description to write.')
-            except (OSError, IOError):
-                self.report_error(u'Cannot write description file ' + descfn)
-                return
+            descfn = filename + u'.description'
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+                self.to_screen(u'[info] Video description is already present')
+            else:
+                try:
+                    self.to_screen(u'[info] Writing video description to: ' + descfn)
+                    with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+                        descfile.write(info_dict['description'])
+                except (KeyError, TypeError):
+                    self.report_warning(u'There\'s no description to write.')
+                except (OSError, IOError):
+                    self.report_error(u'Cannot write description file ' + descfn)
+                    return

        if self.params.get('writeannotations', False):
-            try:
-                annofn = filename + u'.annotations.xml'
-                self.report_writeannotations(annofn)
-                with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
-                    annofile.write(info_dict['annotations'])
-            except (KeyError, TypeError):
-                self.report_warning(u'There are no annotations to write.')
-            except (OSError, IOError):
-                self.report_error(u'Cannot write annotations file: ' + annofn)
-                return
+            annofn = filename + u'.annotations.xml'
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+                self.to_screen(u'[info] Video annotations are already present')
+            else:
+                try:
+                    self.to_screen(u'[info] Writing video annotations to: ' + annofn)
+                    with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+                        annofile.write(info_dict['annotations'])
+                except (KeyError, TypeError):
+                    self.report_warning(u'There are no annotations to write.')
+                except (OSError, IOError):
+                    self.report_error(u'Cannot write annotations file: ' + annofn)
+                    return

        subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                       self.params.get('writeautomaticsub')])
@@ -807,38 +818,48 @@ class YoutubeDL(object):
                    continue
                try:
                    sub_filename = subtitles_filename(filename, sub_lang, sub_format)
-                    self.report_writesubtitles(sub_filename)
-                    with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                            subfile.write(sub)
+                    if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+                        self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
+                    else:
+                        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
+                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+                                subfile.write(sub)
                except (OSError, IOError):
                    self.report_error(u'Cannot write subtitles file ' + descfn)
                    return

        if self.params.get('writeinfojson', False):
            infofn = os.path.splitext(filename)[0] + u'.info.json'
-            self.report_writeinfojson(infofn)
-            try:
-                json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
-                write_json_file(json_info_dict, encodeFilename(infofn))
-            except (OSError, IOError):
-                self.report_error(u'Cannot write metadata to JSON file ' + infofn)
-                return
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+                self.to_screen(u'[info] Video description metadata is already present')
+            else:
+                self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
+                try:
+                    json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
+                    write_json_file(json_info_dict, encodeFilename(infofn))
+                except (OSError, IOError):
+                    self.report_error(u'Cannot write metadata to JSON file ' + infofn)
+                    return

        if self.params.get('writethumbnail', False):
            if info_dict.get('thumbnail') is not None:
                thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
-                thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
-                self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
-                               (info_dict['extractor'], info_dict['id']))
-                try:
-                    uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
-                    with open(thumb_filename, 'wb') as thumbf:
-                        shutil.copyfileobj(uf, thumbf)
-                    self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
-                        (info_dict['extractor'], info_dict['id'], thumb_filename))
-                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                    self.report_warning(u'Unable to download thumbnail "%s": %s' %
-                        (info_dict['thumbnail'], compat_str(err)))
+                thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
+                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+                    self.to_screen(u'[%s] %s: Thumbnail is already present' %
+                                   (info_dict['extractor'], info_dict['id']))
+                else:
+                    self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
+                                   (info_dict['extractor'], info_dict['id']))
+                    try:
+                        uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                        with open(thumb_filename, 'wb') as thumbf:
+                            shutil.copyfileobj(uf, thumbf)
+                        self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
+                            (info_dict['extractor'], info_dict['id'], thumb_filename))
+                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                        self.report_warning(u'Unable to download thumbnail "%s": %s' %
+                            (info_dict['thumbnail'], compat_str(err)))

        if not self.params.get('skip_download', False):
            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -37,6 +37,7 @@ __authors__  = (
    'Anton Larionov',
    'Takuya Tsuchida',
    'Sergey M.',
+    'Michael Orlitzky',
 )

 __license__ = 'Public Domain'
@@ -62,6 +63,7 @@ from .utils import (
    MaxDownloadsReached,
    preferredencoding,
    SameFileError,
+    setproctitle,
    std_headers,
    write_string,
 )
@@ -196,10 +198,14 @@ def parseOpts(overrideArguments=None):
        help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')


-    selection.add_option('--playlist-start',
-            dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
-    selection.add_option('--playlist-end',
-            dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
+    selection.add_option(
+        '--playlist-start',
+        dest='playliststart', metavar='NUMBER', default=1, type=int,
+        help='playlist video to start at (default is %default)')
+    selection.add_option(
+        '--playlist-end',
+        dest='playlistend', metavar='NUMBER', default=None, type=int,
+        help='playlist video to end at (default is last)')
    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
    selection.add_option('--max-downloads', metavar='NUMBER',
@@ -210,6 +216,14 @@ def parseOpts(overrideArguments=None):
    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
    selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
    selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
+    selection.add_option(
+        '--min-views', metavar='COUNT', dest='min_views',
+        default=None, type=int,
+        help="Do not download any videos with less than COUNT views",)
+    selection.add_option(
+        '--max-views', metavar='COUNT', dest='max_views',
+        default=None, type=int,
+        help="Do not download any videos with more than COUNT views",)
    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
                         help='download only videos suitable for the given age',
@@ -290,6 +304,9 @@ def parseOpts(overrideArguments=None):
    verbosity.add_option('--get-description',
            action='store_true', dest='getdescription',
            help='simulate, quiet but print video description', default=False)
+    verbosity.add_option('--get-duration',
+            action='store_true', dest='getduration',
+            help='simulate, quiet but print video length', default=False)
    verbosity.add_option('--get-filename',
            action='store_true', dest='getfilename',
            help='simulate, quiet but print output filename', default=False)
@@ -460,12 +477,15 @@ def parseOpts(overrideArguments=None):

    return parser, opts, args

+
 def _real_main(argv=None):
    # Compatibility fixes for Windows
    if sys.platform == 'win32':
        # https://github.com/rg3/youtube-dl/issues/820
        codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)

+    setproctitle(u'youtube-dl')
+
    parser, opts, args = parseOpts(argv)

    # Set user agent
@@ -560,18 +580,10 @@ def _real_main(argv=None):
        if numeric_buffersize is None:
            parser.error(u'invalid buffer size specified')
        opts.buffersize = numeric_buffersize
-    try:
-        opts.playliststart = int(opts.playliststart)
-        if opts.playliststart <= 0:
-            raise ValueError(u'Playlist start must be positive')
-    except (TypeError, ValueError):
-        parser.error(u'invalid playlist start number specified')
-    try:
-        opts.playlistend = int(opts.playlistend)
-        if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
-            raise ValueError(u'Playlist end must be greater than playlist start')
-    except (TypeError, ValueError):
-        parser.error(u'invalid playlist end number specified')
+    if opts.playliststart <= 0:
+        raise ValueError(u'Playlist start must be positive')
+    if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
+        raise ValueError(u'Playlist end must be greater than playlist start')
    if opts.extractaudio:
        if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
            parser.error(u'invalid audio format specified')
@@ -604,27 +616,30 @@ def _real_main(argv=None):
            or (opts.useid and u'%(id)s.%(ext)s')
            or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
            or u'%(title)s-%(id)s.%(ext)s')
-    if '%(ext)s' not in outtmpl and opts.extractaudio:
+    if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
        parser.error(u'Cannot download a video and extract audio into the same'
-                     u' file! Use "%%(ext)s" instead of %r' %
-                     determine_ext(outtmpl, u''))
+                     u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
+                     u' template'.format(outtmpl))
+
+    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson

    ydl_opts = {
        'usenetrc': opts.usenetrc,
        'username': opts.username,
        'password': opts.password,
        'videopassword': opts.videopassword,
-        'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
+        'quiet': (opts.quiet or any_printing),
        'forceurl': opts.geturl,
        'forcetitle': opts.gettitle,
        'forceid': opts.getid,
        'forcethumbnail': opts.getthumbnail,
        'forcedescription': opts.getdescription,
+        'forceduration': opts.getduration,
        'forcefilename': opts.getfilename,
        'forceformat': opts.getformat,
        'forcejson': opts.dumpjson,
        'simulate': opts.simulate,
-        'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
+        'skip_download': (opts.skip_download or opts.simulate or any_printing),
        'format': opts.format,
        'format_limit': opts.format_limit,
        'listformats': opts.listformats,
@@ -668,6 +683,8 @@ def _real_main(argv=None):
        'keepvideo': opts.keepvideo,
        'min_filesize': opts.min_filesize,
        'max_filesize': opts.max_filesize,
+        'min_views': opts.min_views,
+        'max_views': opts.max_views,
        'daterange': date,
        'cachedir': opts.cachedir,
        'youtube_print_sig_code': opts.youtube_print_sig_code,
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -13,6 +13,7 @@ from .arte import (
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
+from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .breakcom import BreakIE
@@ -20,6 +21,8 @@ from .brightcove import BrightcoveIE
 from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
+from .cbs import CBSIE
+from .channel9 import Channel9IE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .clipsyndicate import ClipsyndicateIE
@@ -87,6 +90,7 @@ from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE, LivestreamOriginalIE
+from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE
@@ -100,6 +104,7 @@ from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
+from .ndtv import NDTVIE
 from .newgrounds import NewgroundsIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
@@ -110,6 +115,7 @@ from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .podomatic import PodomaticIE
+from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
 from .pyvideo import PyvideoIE
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 import json

 from .common import InfoExtractor
@@ -65,18 +64,18 @@ class AppleTrailersIE(InfoExtractor):
        uploader_id = mobj.group('company')

        playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
-        playlist_snippet = self._download_webpage(playlist_url, movie)
-        playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
-        playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
-        # The ' in the onClick attributes are not escaped, it couldn't be parsed
-        # with xml.etree.ElementTree.fromstring
-        # like: http://trailers.apple.com/trailers/wb/gravity/
-        def _clean_json(m):
-            return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
-        playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
-        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+        def fix_html(s):
+            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
+            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+            # The ' in the onClick attributes are not escaped, it couldn't be parsed
+            # like: http://trailers.apple.com/trailers/wb/gravity/
+            def _clean_json(m):
+                return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+            s = re.sub(self._JSON_RE, _clean_json, s)
+            s = u'<html>' + s + u'</html>'
+            return s
+        doc = self._download_xml(playlist_url, movie, transform_source=fix_html)

-        doc = xml.etree.ElementTree.fromstring(playlist_html)
        playlist = []
        for li in doc.findall('./div/ul/li'):
            on_click = li.find('.//a').attrib['onClick']
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -0,0 +1,86 @@
+import datetime
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    remove_start,
+)
+
+
+class BlinkxIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
+    _IE_NAME = u'blinkx'
+
+    _TEST = {
+        u'url': u'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
+        u'file': u'8aQUy7GV.mp4',
+        u'md5': u'2e9a07364af40163a908edbf10bb2492',
+        u'info_dict': {
+            u"title": u"Police Car Rolls Away",
+            u"uploader": u"stupidvideos.com",
+            u"upload_date": u"20131215",
+            u"description": u"A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
+            u"duration": 14.886,
+            u"thumbnails": [{
+                "width": 100,
+                "height": 76,
+                "url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
+            }],
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+        display_id = video_id[:8]
+
+        api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
+                   u'video=%s' % video_id)
+        data_json = self._download_webpage(api_url, display_id)
+        data = json.loads(data_json)['api']['results'][0]
+        dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
+        upload_date = dt.strftime('%Y%m%d')
+
+        duration = None
+        thumbnails = []
+        formats = []
+        for m in data['media']:
+            if m['type'] == 'jpg':
+                thumbnails.append({
+                    'url': m['link'],
+                    'width': int(m['w']),
+                    'height': int(m['h']),
+                })
+            elif m['type'] == 'original':
+                duration = m['d']
+            elif m['type'] in ('flv', 'mp4'):
+                vcodec = remove_start(m['vcodec'], 'ff')
+                acodec = remove_start(m['acodec'], 'ff')
+                format_id = (u'%s-%sk-%s' %
+                             (vcodec,
+                              (int(m['vbr']) + int(m['abr'])) // 1000,
+                              m['w']))
+                formats.append({
+                    'format_id': format_id,
+                    'url': m['link'],
+                    'vcodec': vcodec,
+                    'acodec': acodec,
+                    'abr': int(m['abr']) // 1000,
+                    'vbr': int(m['vbr']) // 1000,
+                    'width': int(m['w']),
+                    'height': int(m['h']),
+                })
+        formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr']))
+
+        return {
+            'id': display_id,
+            'fullid': video_id,
+            'title': data['title'],
+            'formats': formats,
+            'uploader': data['channel_name'],
+            'upload_date': upload_date,
+            'description': data.get('description'),
+            'thumbnails': thumbnails,
+            'duration': duration,
+        }
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -55,6 +55,18 @@ class BrightcoveIE(InfoExtractor):
                u'uploader': u'Mashable',
            },
        },
+        {
+            # test that the default referer works
+            # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
+            u'url': u'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
+            u'info_dict': {
+                u'id': u'2878862109001',
+                u'ext': u'mp4',
+                u'title': u'Lost in Motion II',
+                u'description': u'md5:363109c02998fee92ec02211bd8000df',
+                u'uploader': u'National Ballet of Canada',
+            },
+        },
    ]

    @classmethod
@@ -118,17 +130,21 @@ class BrightcoveIE(InfoExtractor):

        videoPlayer = query.get('@videoPlayer')
        if videoPlayer:
-            return self._get_video_info(videoPlayer[0], query_str, query)
+            return self._get_video_info(videoPlayer[0], query_str, query,
+                # We set the original url as the default 'Referer' header
+                referer=url)
        else:
            player_key = query['playerKey']
            return self._get_playlist_info(player_key[0])

-    def _get_video_info(self, video_id, query_str, query):
+    def _get_video_info(self, video_id, query_str, query, referer=None):
        request_url = self._FEDERATED_URL_TEMPLATE % query_str
        req = compat_urllib_request.Request(request_url)
        linkBase = query.get('linkBaseURL')
        if linkBase is not None:
-            req.add_header('Referer', linkBase[0])
+            referer = linkBase[0]
+        if referer is not None:
+            req.add_header('Referer', referer)
        webpage = self._download_webpage(req, video_id)

        self.report_extraction(video_id)
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -0,0 +1,30 @@
+import re
+
+from .common import InfoExtractor
+
+
+class CBSIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
+
+    _TEST = {
+        u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
+        u'file': u'4JUVEwq3wUT7.flv',
+        u'info_dict': {
+            u'title': u'Connect Chat feat. Garth Brooks',
+            u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
+            u'duration': 1495,
+        },
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        real_id = self._search_regex(
+            r"video\.settings\.pid\s*=\s*'([^']+)';",
+            webpage, u'real video ID')
+        return self.url_result(u'theplatform:%s' % real_id)
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -0,0 +1,267 @@
+# encoding: utf-8
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+class Channel9IE(InfoExtractor):
+    '''
+    Common extractor for channel9.msdn.com.
+
+    The type of provided URL (video or playlist) is determined according to
+    meta Search.PageType from web page HTML rather than URL itself, as it is
+    not always possible to do.    
+    '''
+    IE_DESC = u'Channel 9'
+    IE_NAME = u'channel9'
+    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
+
+    _TESTS = [
+        {
+            u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
+            u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
+            u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
+            u'info_dict': {
+                u'title': u'Developer Kick-Off Session: Stuff We Love',
+                u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
+                u'duration': 4576,
+                u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
+                u'session_code': u'KOS002',
+                u'session_day': u'Day 1',
+                u'session_room': u'Arena 1A',
+                u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
+            },
+        },
+        {
+            u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
+            u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
+            u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
+            u'info_dict': {
+                u'title': u'Self-service BI with Power BI - nuclear testing',
+                u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
+                u'duration': 1540,
+                u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
+                u'authors': [ u'Mike Wilmot' ],
+            },
+        }
+    ]
+
+    _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
+
+    # Sorted by quality
+    _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
+
+    def _restore_bytes(self, formatted_size):
+        if not formatted_size:
+            return 0
+        m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
+        if not m:
+            return 0
+        units = m.group('units')
+        try:
+            exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
+        except ValueError:
+            return 0
+        size = float(m.group('size'))
+        return int(size * (1024 ** exponent))
+
+    def _formats_from_html(self, html):
+        FORMAT_REGEX = r'''
+            (?x)
+            <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
+            <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
+            (?:<div\s+class="popup\s+rounded">\s*
+            <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
+            </div>)?                                                # File size part may be missing
+        '''
+        # Extract known formats
+        formats = [{'url': x.group('url'),
+                 'format_id': x.group('quality'),
+                 'format_note': x.group('note'),
+                 'format': '%s (%s)' % (x.group('quality'), x.group('note')), 
+                 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
+                 } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
+        # Sort according to known formats list
+        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
+        return formats
+
+    def _extract_title(self, html):
+        title = self._html_search_meta(u'title', html, u'title')
+        if title is None:           
+            title = self._og_search_title(html)
+            TITLE_SUFFIX = u' (Channel 9)'
+            if title is not None and title.endswith(TITLE_SUFFIX):
+                title = title[:-len(TITLE_SUFFIX)]
+        return title
+
+    def _extract_description(self, html):
+        DESCRIPTION_REGEX = r'''(?sx)
+            <div\s+class="entry-content">\s*
+            <div\s+id="entry-body">\s*
+            (?P<description>.+?)\s*
+            </div>\s*
+            </div>
+        '''
+        m = re.search(DESCRIPTION_REGEX, html)
+        if m is not None:
+            return m.group('description')
+        return self._html_search_meta(u'description', html, u'description')
+
+    def _extract_duration(self, html):
+        m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
+        return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
+
+    def _extract_slides(self, html):
+        m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
+        return m.group('slidesurl') if m is not None else None
+
+    def _extract_zip(self, html):
+        m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
+        return m.group('zipurl') if m is not None else None
+
+    def _extract_avg_rating(self, html):
+        m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
+        return float(m.group('avgrating')) if m is not None else 0
+
+    def _extract_rating_count(self, html):
+        m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
+        return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
+
+    def _extract_view_count(self, html):
+        m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
+        return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
+
+    def _extract_comment_count(self, html):
+        m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
+        return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
+
+    def _fix_count(self, count):
+        return int(str(count).replace(',', '')) if count is not None else None
+
+    def _extract_authors(self, html):
+        m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
+        if m is None:
+            return None
+        return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
+
+    def _extract_session_code(self, html):
+        m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
+        return m.group('code') if m is not None else None
+
+    def _extract_session_day(self, html):
+        m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
+        return m.group('day') if m is not None else None
+
+    def _extract_session_room(self, html):
+        m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
+        return m.group('room') if m is not None else None
+
+    def _extract_session_speakers(self, html):
+        return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
+
+    def _extract_content(self, html, content_path):
+        # Look for downloadable content        
+        formats = self._formats_from_html(html)
+        slides = self._extract_slides(html)
+        zip_ = self._extract_zip(html)
+
+        # Nothing to download
+        if len(formats) == 0 and slides is None and zip_ is None:
+            self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
+            return
+
+        # Extract meta
+        title = self._extract_title(html)
+        description = self._extract_description(html)
+        thumbnail = self._og_search_thumbnail(html)
+        duration = self._extract_duration(html)
+        avg_rating = self._extract_avg_rating(html)
+        rating_count = self._extract_rating_count(html)
+        view_count = self._extract_view_count(html)
+        comment_count = self._extract_comment_count(html)
+
+        common = {'_type': 'video',
+                  'id': content_path,
+                  'description': description,
+                  'thumbnail': thumbnail,
+                  'duration': duration,
+                  'avg_rating': avg_rating,
+                  'rating_count': rating_count,
+                  'view_count': view_count,
+                  'comment_count': comment_count,
+                }
+
+        result = []
+
+        if slides is not None:
+            d = common.copy()
+            d.update({ 'title': title + '-Slides', 'url': slides })
+            result.append(d)
+
+        if zip_ is not None:
+            d = common.copy()
+            d.update({ 'title': title + '-Zip', 'url': zip_ })
+            result.append(d)
+
+        if len(formats) > 0:
+            d = common.copy()
+            d.update({ 'title': title, 'formats': formats })
+            result.append(d)
+
+        return result
+
+    def _extract_entry_item(self, html, content_path):
+        contents = self._extract_content(html, content_path)
+        if contents is None:
+            return contents
+
+        authors = self._extract_authors(html)
+
+        for content in contents:
+            content['authors'] = authors
+
+        return contents
+
+    def _extract_session(self, html, content_path):
+        contents = self._extract_content(html, content_path)
+        if contents is None:
+            return contents
+
+        session_meta = {'session_code': self._extract_session_code(html),
+                        'session_day': self._extract_session_day(html),
+                        'session_room': self._extract_session_room(html),
+                        'session_speakers': self._extract_session_speakers(html),
+                        }
+
+        for content in contents:
+            content.update(session_meta)
+
+        return contents
+
+    def _extract_list(self, content_path):
+        rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
+        entries = [self.url_result(session_url.text, 'Channel9')
+                   for session_url in rss.findall('./channel/item/link')]
+        title_text = rss.find('./channel/title').text
+        return self.playlist_result(entries, content_path, title_text)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        content_path = mobj.group('contentpath')
+
+        webpage = self._download_webpage(url, content_path, u'Downloading web page')
+
+        page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
+        if page_type_m is None:
+            raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
+
+        page_type = page_type_m.group('pagetype')
+        if page_type == 'List':         # List page, may contain list of 'item'-like objects
+            return self._extract_list(content_path)
+        elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
+            return self._extract_entry_item(webpage, content_path)
+        elif page_type == 'Session':    # Event session page, may contain downloadable content
+            return self._extract_session(webpage, content_path)
+        else:
+            raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@@ -1,9 +1,9 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
    find_xpath_attr,
+    fix_xml_all_ampersand,
 )


@@ -30,12 +30,10 @@ class ClipsyndicateIE(InfoExtractor):
        # it includes a required token
        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')

-        playlist_page = self._download_webpage(
+        pdoc = self._download_xml(
            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
-            video_id, u'Downloading video info') 
-        # Fix broken xml
-        playlist_page = re.sub('&', '&amp;', playlist_page)
-        pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
+            video_id, u'Downloading video info',
+            transform_source=fix_xml_all_ampersand) 

        track_doc = pdoc.find('trackList/track')
        def find_param(name):
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -34,15 +34,39 @@ class InfoExtractor(object):
    The dictionaries must include the following fields:

    id:             Video identifier.
-    url:            Final video URL.
    title:          Video title, unescaped.
-    ext:            Video filename extension.

-    Instead of url and ext, formats can also specified.
+    Additionally, it must contain either a formats entry or url and ext:
+
+    formats:        A list of dictionaries for each format available, it must
+                    be ordered from worst to best quality. Potential fields:
+                    * url        Mandatory. The URL of the video file
+                    * ext        Will be calculated from url if missing
+                    * format     A human-readable description of the format
+                                 ("mp4 container with h264/opus").
+                                 Calculated from the format_id, width, height.
+                                 and format_note fields if missing.
+                    * format_id  A short description of the format
+                                 ("mp4_h264_opus" or "19")
+                    * format_note Additional info about the format
+                                 ("3D" or "DASH video")
+                    * width      Width of the video, if known
+                    * height     Height of the video, if known
+                    * abr        Average audio bitrate in KBit/s
+                    * acodec     Name of the audio codec in use
+                    * vbr        Average video bitrate in KBit/s
+                    * vcodec     Name of the video codec in use
+                    * filesize   The number of bytes, if known in advance
+                    * player_url SWF Player URL (used for rtmpdump).
+    url:            Final video URL.
+    ext:            Video filename extension.
+    format:         The video format, defaults to ext (used for --get-format)
+    player_url:     SWF Player URL (used for rtmpdump).
+    urlhandle:      [internal] The urlHandle to be used to download the file,
+                    like returned by urllib.request.urlopen

    The following fields are optional:

-    format:         The video format, defaults to ext (used for --get-format)
    thumbnails:     A list of dictionaries (with the entries "resolution" and
                    "url") for the varying thumbnails
    thumbnail:      Full URL to a video thumbnail image.
@@ -51,35 +75,14 @@ class InfoExtractor(object):
    upload_date:    Video upload date (YYYYMMDD).
    uploader_id:    Nickname or id of the video uploader.
    location:       Physical location of the video.
-    player_url:     SWF Player URL (used for rtmpdump).
    subtitles:      The subtitle file contents as a dictionary in the format
                    {language: subtitles}.
+    duration:       Length of the video in seconds, as an integer.
    view_count:     How many users have watched the video on the platform.
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
    comment_count:  Number of comments on the video
-    urlhandle:      [internal] The urlHandle to be used to download the file,
-                    like returned by urllib.request.urlopen
    age_limit:      Age restriction for the video, as an integer (years)
-    formats:        A list of dictionaries for each format available, it must
-                    be ordered from worst to best quality. Potential fields:
-                    * url       Mandatory. The URL of the video file
-                    * ext       Will be calculated from url if missing
-                    * format    A human-readable description of the format
-                                ("mp4 container with h264/opus").
-                                Calculated from the format_id, width, height.
-                                and format_note fields if missing.
-                    * format_id A short description of the format
-                                ("mp4_h264_opus" or "19")
-                    * format_note Additional info about the format
-                                ("3D" or "DASH video")
-                    * width     Width of the video, if known
-                    * height    Height of the video, if known
-                    * abr       Average audio bitrate in KBit/s
-                    * acodec    Name of the audio codec in use
-                    * vbr       Average video bitrate in KBit/s
-                    * vcodec    Name of the video codec in use
-                    * filesize  The number of bytes, if known in advance
    webpage_url:    The url to the video webpage, if given to youtube-dl it
                    should allow to get the same result again. (It will be set
                    by YoutubeDL if it's missing)
@@ -230,9 +233,12 @@ class InfoExtractor(object):
            return content

    def _download_xml(self, url_or_request, video_id,
-                      note=u'Downloading XML', errnote=u'Unable to download XML'):
+                      note=u'Downloading XML', errnote=u'Unable to download XML',
+                      transform_source=None):
        """Return the xml as an xml.etree.ElementTree.Element"""
        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        if transform_source:
+            xml_string = transform_source(xml_string)
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))

    def to_screen(self, msg):
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -28,7 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
 class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
    """Information Extractor for Dailymotion"""

-    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
+    _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
    IE_NAME = u'dailymotion'

    _FORMATS = [
@@ -81,7 +81,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
        # Extract id and simplified title from URL
        mobj = re.match(self._VALID_URL, url)

-        video_id = mobj.group(1).split('_')[0].split('?')[0]
+        video_id = mobj.group('id')

        url = 'http://www.dailymotion.com/video/%s' % video_id

@@ -101,10 +101,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            self.to_screen(u'Vevo video detected: %s' % vevo_id)
            return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')

-        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
-                                             # Looking for official user
-                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
-                                            webpage, 'video uploader', fatal=False)
        age_limit = self._rta_search(webpage)

        video_upload_date = None
@@ -147,13 +143,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            self._list_available_subtitles(video_id, webpage)
            return

-        view_count = str_to_int(self._search_regex(
-            r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count'))
+        view_count = self._search_regex(
+            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
+        if view_count is not None:
+            view_count = str_to_int(view_count)

        return {
            'id':       video_id,
            'formats': formats,
-            'uploader': video_uploader,
+            'uploader': info['owner_screenname'],
            'upload_date':  video_upload_date,
            'title':    self._og_search_title(webpage),
            'subtitles':    video_subtitles,
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -9,7 +9,7 @@ from ..utils import (


 class DaumIE(InfoExtractor):
-    _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
    IE_NAME = u'daum.net'

    _TEST = {
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -17,7 +17,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
    """Information Extractor for Facebook"""

-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
        u'file': u'120708114770723.mp4',
        u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
        u'info_dict': {
-            u"duration": 279, 
+            u"duration": 279,
            u"title": u"PEOPLE ARE AWESOME 2013"
        }
    }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -222,6 +222,18 @@ class GenericIE(InfoExtractor):
                'id': video_id,
            }

+        # Look for embedded blip.tv player
+        mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
+        if mobj:
+            return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
+        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
+        if mobj:
+            player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
+            player_page = self._download_webpage(player_url, mobj.group(1))
+            blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
+            if blip_video_id:
+                return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
+
        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -44,7 +44,7 @@ class IGNIE(InfoExtractor):
                {
                    u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
                    u'info_dict': {
-                        u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion',
+                        u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
                        u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
--- a/youtube_dl/extractor/mdr.py
+++ b/youtube_dl/extractor/mdr.py
@@ -0,0 +1,78 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+)
+
+
+class MDRIE(InfoExtractor):
+    _VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
+
+    _TESTS = [{
+        u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
+        u'file': u'165624.mp4',
+        u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
+        u'info_dict': {
+            u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
+        },
+    },
+    {
+        u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
+        u'file': u'718370.mp3',
+        u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
+        u'info_dict': {
+            u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
+        },
+    }]
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('video_id')
+        domain = m.group('domain')
+
+        # determine title and media streams from webpage
+        html = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
+        xmlurl = self._search_regex(
+            r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
+
+        doc = self._download_xml(domain + xmlurl, video_id)
+        formats = []
+        for a in doc.findall('./assets/asset'):
+            url_el = a.find('.//progressiveDownloadUrl')
+            if url_el is None:
+                continue
+            abr = int(a.find('bitrateAudio').text) // 1000
+            media_type = a.find('mediaType').text
+            format = {
+                'abr': abr,
+                'filesize': int(a.find('fileSize').text),
+                'url': url_el.text,
+            }
+
+            vbr_el = a.find('bitrateVideo')
+            if vbr_el is None:
+                format.update({
+                    'vcodec': 'none',
+                    'format_id': u'%s-%d' % (media_type, abr),
+                })
+            else:
+                vbr = int(vbr_el.text) // 1000
+                format.update({
+                    'vbr': vbr,
+                    'width': int(a.find('frameWidth').text),
+                    'height': int(a.find('frameHeight').text),
+                    'format_id': u'%s-%d' % (media_type, vbr),
+                })
+            formats.append(format)
+        formats.sort(key=lambda f: (f.get('vbr'), f['abr']))
+        if not formats:
+            raise ExtractorError(u'Could not find any valid formats')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@@ -1,8 +1,10 @@
 import re
-import xml.etree.ElementTree
 import operator

 from .common import InfoExtractor
+from ..utils import (
+    fix_xml_all_ampersand,
+)


 class MetacriticIE(InfoExtractor):
@@ -23,9 +25,8 @@ class MetacriticIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        # The xml is not well formatted, there are raw '&'
-        info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
-            video_id, u'Downloading info xml').replace('&', '&amp;')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
+            video_id, u'Downloading info xml', transform_source=fix_xml_all_ampersand)

        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
        formats = []
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -37,6 +37,9 @@ class MixcloudIE(InfoExtractor):

        return None

+    def _get_url(self, template_url):
+        return self.check_urls(template_url % i for i in range(30))
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)

@@ -52,13 +55,18 @@ class MixcloudIE(InfoExtractor):
        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
-        final_song_url = self.check_urls(template_url % i for i in range(30))
+        final_song_url = self._get_url(template_url)
+        if final_song_url is None:
+            self.to_screen('Trying with m4a extension')
+            template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
+            final_song_url = self._get_url(template_url)
+        if final_song_url is None:
+            raise ExtractorError(u'Unable to extract track url')

        return {
            'id': track_id,
            'title': info['name'],
            'url': final_song_url,
-            'ext': 'mp3',
            'description': info.get('description'),
            'thumbnail': info['pictures'].get('extra_large'),
            'uploader': info['user']['name'],
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -82,8 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
    def _get_videos_info(self, uri):
        video_id = self._id_from_uri(uri)
        data = compat_urllib_parse.urlencode({'uri': uri})
-        idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
-                                         u'Downloading info')
+
+        def fix_ampersand(s):
+            """ Fix unencoded ampersand in XML """
+            return s.replace(u'& ', '&amp; ')
+        idoc = self._download_xml(
+            self._FEED_URL + '?' + data, video_id,
+            u'Downloading info', transform_source=fix_ampersand)
        return [self._get_video_info(item) for item in idoc.findall('.//item')]


--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -9,7 +9,7 @@ from ..utils import (


 class NaverIE(InfoExtractor):
-    _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'

    _TEST = {
        u'url': u'http://tvcast.naver.com/v/81652',
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dl/extractor/ndtv.py
@@ -0,0 +1,64 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import month_by_name
+
+
+class NDTVIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
+
+    _TEST = {
+        u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710",
+        u"file": u"300710.mp4",
+        u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88",
+        u"info_dict": {
+            u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
+            u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.",
+            u"upload_date": u"20131208",
+            u"duration": 1327,
+            u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg",
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        filename = self._search_regex(
+            r"__filename='([^']+)'", webpage, u'video filename')
+        video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
+                     filename)
+
+        duration_str = filename = self._search_regex(
+            r"__duration='([^']+)'", webpage, u'duration', fatal=False)
+        duration = None if duration_str is None else int(duration_str)
+
+        date_m = re.search(r'''(?x)
+            <p\s+class="vod_dateline">\s*
+                Published\s+On:\s*
+                (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
+            ''', webpage)
+        upload_date = None
+        assert date_m
+        if date_m is not None:
+            month = month_by_name(date_m.group('monthname'))
+            if month is not None:
+                upload_date = '%s%02d%02d' % (
+                    date_m.group('year'), month, int(date_m.group('day')))
+
+        description = self._og_search_description(webpage)
+        READ_MORE = u' (Read more)'
+        if description.endswith(READ_MORE):
+            description = description[:-len(READ_MORE)]
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': self._og_search_title(webpage),
+            'description': description,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'duration': duration,
+            'upload_date': upload_date,
+        }
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -0,0 +1,38 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import compat_urllib_parse
+
+
+class PornHdIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
+    _TEST = {
+        u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+        u'file': u'1962.flv',
+        u'md5': u'35272469887dca97abd30abecc6cdf75',
+        u'info_dict': {
+            u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('video_id')
+        video_title = mobj.group('video_title')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'&hd=(http.+?)&', webpage, u'video URL')
+        video_url = compat_urllib_parse.unquote(video_url)
+        age_limit = 18
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
+            'age_limit': age_limit,
+        }
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -12,7 +12,7 @@ from ..aes import (
 )

 class PornHubIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
    _TEST = {
        u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
        u'file': u'648719015.mp4',
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -7,14 +7,15 @@ from ..utils import (
    ExtractorError,
 )

+
 class RTLnowIE(InfoExtractor):
    """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/|(?:www\.)?n-tvnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
    _TESTS = [{
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
        u'file': u'90419.flv',
        u'info_dict': {
-            u'upload_date': u'20070416', 
+            u'upload_date': u'20070416',
            u'title': u'Ahornallee - Folge 1 - Der Einzug',
            u'description': u'Folge 1 - Der Einzug',
        },
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -25,7 +25,8 @@ class SoundcloudIE(InfoExtractor):

    _VALID_URL = r'''^(?:https?://)?
                    (?:(?:(?:www\.)?soundcloud\.com/
-                            (?P<uploader>[\w\d-]+)/(?P<title>[\w\d-]+)/?
+                            (?P<uploader>[\w\d-]+)/
+                            (?!sets/)(?P<title>[\w\d-]+)/?
                            (?P<token>[^?]+?)?(?:[?].*)?$)
                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
                       |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
@@ -72,6 +73,19 @@ class SoundcloudIE(InfoExtractor):
                u'upload_date': u'20131209',
            },
        },
+        # downloadable song
+        {
+            u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1',
+            u'md5': u'56a8b69568acaa967b4c49f9d1d52d19',
+            u'info_dict': {
+                u'id': u'105614606',
+                u'ext': u'wav',
+                u'title': u'Just Your Problem Baby (Acapella)',
+                u'description': u'Vocals',
+                u'uploader': u'Sim Gretina',
+                u'upload_date': u'20130815',
+            },
+        },
    ]

    _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
@@ -98,7 +112,7 @@ class SoundcloudIE(InfoExtractor):
        thumbnail = info['artwork_url']
        if thumbnail is not None:
            thumbnail = thumbnail.replace('-large', '-t500x500')
-        ext = info.get('original_format', u'mp3')
+        ext = u'mp3'
        result = {
            'id': track_id,
            'uploader': info['user']['username'],
@@ -114,7 +128,7 @@ class SoundcloudIE(InfoExtractor):
                    track_id, self._CLIENT_ID))
            result['formats'] = [{
                'format_id': 'download',
-                'ext': ext,
+                'ext': info.get('original_format', u'mp3'),
                'url': format_url,
                'vcodec': 'none',
            }]
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -3,6 +3,7 @@ import json

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
    xpath_with_ns,
 )

@@ -32,6 +33,17 @@ class ThePlatformIE(InfoExtractor):
        smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
            'format=smil&mbr=true'.format(video_id))
        meta = self._download_xml(smil_url, video_id)
+
+        try:
+            error_msg = next(
+                n.attrib['abstract']
+                for n in meta.findall(_x('.//smil:ref'))
+                if n.attrib.get('title') == u'Geographic Restriction')
+        except StopIteration:
+            pass
+        else:
+            raise ExtractorError(error_msg, expected=True)
+
        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
        info_json = self._download_webpage(info_url, video_id)
        info = json.loads(info_json)
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@@ -15,6 +15,7 @@ class VideoPremiumIE(InfoExtractor):
        u'params': {
            u'skip_download': True,
        },
+        u'skip': u'Test file has been deleted.',
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -115,7 +115,7 @@ class VimeoIE(InfoExtractor):
    def _real_initialize(self):
        self._login()

-    def _real_extract(self, url, new_video=True):
+    def _real_extract(self, url):
        url, data = unsmuggle_url(url)
        headers = std_headers
        if data is not None:
@@ -151,8 +151,14 @@ class VimeoIE(InfoExtractor):
                config = json.loads(config_json)
            except RegexNotFoundError:
                # For pro videos or player.vimeo.com urls
-                config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'],
-                    webpage, u'info section', flags=re.DOTALL)
+                # We try to find out to which variable is assigned the config dic
+                m_variable_name = re.search('(\w)\.video\.id', webpage)
+                if m_variable_name is not None:
+                    config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
+                else:
+                    config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
+                config = self._search_regex(config_re, webpage, u'info section',
+                    flags=re.DOTALL)
                config = json.loads(config)
        except Exception as e:
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1377,9 +1377,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

        if 'length_seconds' not in video_info:
            self._downloader.report_warning(u'unable to extract video duration')
-            video_duration = ''
+            video_duration = None
        else:
-            video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
+            video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))

        # annotations
        video_annotations = None
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -73,14 +73,14 @@ class ZDFIE(InfoExtractor):
            try:
                proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
            except ValueError:
-                proto_pref = 999
+                proto_pref = -999

            quality = fnode.find('./quality').text
            QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
            try:
                quality_pref = -QUALITY_ORDER.index(quality)
            except ValueError:
-                quality_pref = 999
+                quality_pref = -999

            abr = int(fnode.find('./audioBitrate').text) // 1000
            vbr = int(fnode.find('./videoBitrate').text) // 1000
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

+import ctypes
 import datetime
 import email.utils
 import errno
@@ -1045,3 +1046,41 @@ def get_term_width():
    except:
        pass
    return None
+
+
+def month_by_name(name):
+    """ Return the number of a month by (locale-independently) English name """
+
+    ENGLISH_NAMES = [
+        u'January', u'February', u'March', u'April', u'May', u'June',
+        u'July', u'August', u'September', u'October', u'November', u'December']
+    try:
+        return ENGLISH_NAMES.index(name) + 1
+    except ValueError:
+        return None
+
+
+def fix_xml_all_ampersand(xml_str):
+    """Replace all the '&' by '&amp;' in XML"""
+    return xml_str.replace(u'&', u'&amp;')
+
+
+def setproctitle(title):
+    assert isinstance(title, type(u''))
+    try:
+        libc = ctypes.cdll.LoadLibrary("libc.so.6")
+    except OSError:
+        return
+    title = title
+    buf = ctypes.create_string_buffer(len(title) + 1)
+    buf.value = title.encode('utf-8')
+    try:
+        libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
+    except AttributeError:
+        return  # Strange libc, just skip this
+
+
+def remove_start(s, start):
+    if s.startswith(start):
+        return s[len(start):]
+    return s
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.12.09.2'
+__version__ = '2013.12.16.4'
Author	SHA1	Message	Date
Philipp Hagemeister	24050dd11c	release 2013.12.16.4	2013-12-16 21:10:18 +01:00
Philipp Hagemeister	8c8e3eec79	[facebook] Recognize #! URLs (Fixes #1988 )	2013-12-16 21:10:06 +01:00
Philipp Hagemeister	7ebc9dee69	Merge pull request #1987 from rzhxeo/blip [GenericIE] Add support for embedded blip.tv	2013-12-16 11:28:34 -08:00
rzhxeo	ee3e63e477	[GenericIE] Add support for embedded blip.tv	2013-12-16 20:08:23 +01:00
Philipp Hagemeister	e9c424c144	Merge pull request #1984 from alimirjamali/patch-1 Incorrect variable is used to check whether thumbnail exists	2013-12-16 09:04:36 -08:00
alimirjamali	0a9ce268ba	Incorrect variable is used to check whether thumbnail exists Dear @phihag I believe in line 848, the correct variable to check is 'thumb_filename' rather than 'infofn' Kindly advise Mit freundlichen Gruessen Ali	2013-12-16 20:14:28 +03:30
Philipp Hagemeister	4b2da48ea7	release 2013.12.16.3	2013-12-16 14:44:29 +01:00
Philipp Hagemeister	e64eaaa97d	Fix execution under Python 3	2013-12-16 14:44:17 +01:00
Philipp Hagemeister	780603027f	[videopremium] Skip test	2013-12-16 14:42:07 +01:00
Philipp Hagemeister	00902cd601	release 2013.12.16.2	2013-12-16 14:13:51 +01:00
Philipp Hagemeister	d67b0b1596	Reorder info_dict documentation	2013-12-16 14:13:40 +01:00
Philipp Hagemeister	d7dda16888	[blinkx] Add extractor (Fixes #1972 )	2013-12-16 13:56:30 +01:00
Philipp Hagemeister	a19fd00cc4	Simplify --playlist-start / --playlist-end interface	2013-12-16 13:16:20 +01:00
Philipp Hagemeister	d66152a898	[ndtv] Remove unused imports	2013-12-16 08:16:38 +01:00
Philipp Hagemeister	8c5f0c9fbc	[mdr] Clean up	2013-12-16 08:16:11 +01:00
Philipp Hagemeister	6888a874a1	release 2013.12.16.1	2013-12-16 05:45:15 +01:00
Philipp Hagemeister	09dacfa57f	[mdr] Simplify	2013-12-16 05:44:34 +01:00
Philipp Hagemeister	b2ae513586	Merge remote-tracking branch 'mc2avr/master'	2013-12-16 05:14:03 +01:00
Philipp Hagemeister	e4a0489f6e	Merge remote-tracking branch 'dstftw/channel9' Conflicts: youtube_dl/extractor/__init__.py	2013-12-16 05:14:00 +01:00
Philipp Hagemeister	b83be81d27	Credit @mjorlitzky for pornhd (#1961 )	2013-12-16 05:11:19 +01:00
Philipp Hagemeister	6f5dcd4eee	[pornhd] Simplify	2013-12-16 05:10:42 +01:00
Philipp Hagemeister	1bb2fc98e0	Merge remote-tracking branch 'mjorlitzky/master'	2013-12-16 05:07:58 +01:00
Philipp Hagemeister	e3946f989e	Set process title to youtube-dl This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.	2013-12-16 05:04:55 +01:00
Philipp Hagemeister	8863d0de91	release 2013.12.16	2013-12-16 04:45:32 +01:00
Philipp Hagemeister	7b6fefc9d4	Apply --no-overwrites for --write-* files as well (Fixes #1980 )	2013-12-16 04:39:13 +01:00
Philipp Hagemeister	525ef9227f	Add --get-duration (Fixes #859 )	2013-12-16 04:15:10 +01:00
Philipp Hagemeister	c0ba0f4859	Document duration field	2013-12-16 04:09:43 +01:00
Philipp Hagemeister	b466b7029d	[youtube] Make duration an integer or None	2013-12-16 04:09:05 +01:00
Philipp Hagemeister	fa3ae234e0	[cbs] Add extractor (Fixes #1977 )	2013-12-16 03:53:43 +01:00
Philipp Hagemeister	48462108f3	[theplatform] Fix geographic restriction check	2013-12-16 03:43:45 +01:00
Philipp Hagemeister	f8b56e95b8	[theplatform] Detect geoblocked content	2013-12-16 03:34:46 +01:00
Philipp Hagemeister	5fe18bdbde	Add --min-views / --max-views (Fixes #1979 )	2013-12-16 03:09:49 +01:00
Jaime Marquínez Ferrándiz	dca02c80bc	Fix detection of the extension if the 'extractaudio' is given and improve the error message (#1969 ) Using 'foo.mp4' shouldn't raise an error. If 'foo' is given suggest using 'foo.%(ext)s' for the template	2013-12-15 11:42:38 +01:00
Jaime Marquínez Ferrándiz	9ee859b683	[daylimotion] Add support for urls from the mobile site (fixes #1953 ) It uses the 'touch' subdomain and adds a '#' before 'video'	2013-12-14 14:20:12 +01:00
Michael Orlitzky	8e05c870b4	Add support for pornhd.com.	2013-12-13 22:24:32 -05:00
Jaime Marquínez Ferrándiz	5d574e143f	[ign] Update one of test video's title	2013-12-13 17:04:40 +01:00
Philipp Hagemeister	2a203a6cda	Merge pull request #1956 from dstftw/master Fix typo in month name	2013-12-13 07:41:34 -08:00
dst	dadb8184e4	Fix typo in month name	2013-12-13 22:27:37 +07:00
Jaime Marquínez Ferrándiz	7a563df90a	[daum] Recognize mobile urls (#1952 )	2013-12-12 13:05:38 +01:00
Jaime Marquínez Ferrándiz	24b173fa5c	[naver] Recognize mobile urls (fixes #1951 )	2013-12-12 13:04:02 +01:00
dst	9b17ba0fa5	[channel9] Fix test description md5	2013-12-12 16:10:17 +07:00
dst	211f555d4c	[channel9] Missing import in __init__	2013-12-12 15:55:31 +07:00
dst	4d2ebb6bd7	[channel9] Cleanup	2013-12-12 15:19:23 +07:00
dst	df53747436	[channel9] Initial implementation (#1885 )	2013-12-12 15:13:45 +07:00
Philipp Hagemeister	f2c36ee43e	release 2013.12.11.2	2013-12-11 09:22:25 +01:00
Philipp Hagemeister	00381b4ccb	[pornhub] Fix URL regexp	2013-12-11 09:22:08 +01:00
Philipp Hagemeister	fca1ef19c1	release 2013.12.11.1	2013-12-11 08:54:54 +01:00
Philipp Hagemeister	357ddadbf5	Fix thumbnail filename determination (Fixes #1945 )	2013-12-11 08:54:48 +01:00
Philipp Hagemeister	08d03235f9	release 2013.12.11	2013-12-11 08:45:51 +01:00
Jaime Marquínez Ferrándiz	1825836235	Use `_download_xml` in more extractors	2013-12-10 21:03:53 +01:00
Jaime Marquínez Ferrándiz	a0088bdf93	[vimeo] Fix unused argument of the `_real_extract` method	2013-12-10 20:43:16 +01:00
Jaime Marquínez Ferrándiz	48ad51b243	[vimeo] Fix the extraction for some 'player' or 'pro' videos The variable the config dict is assigned to can change, now we try to detect it or fallback to a, b or c	2013-12-10 20:28:12 +01:00
Jaime Marquínez Ferrándiz	5458b4cefb	[dailymotion] Fix view count extraction and make it non fatal (fixes #1940 )	2013-12-10 19:47:00 +01:00
Jaime Marquínez Ferrándiz	7c86cd5ab1	[dailymotion] Fix uploader extraction Now it looks directly in the info dictionary	2013-12-10 19:44:16 +01:00
mc2avr	df1d7da2af	add MDRIE	2013-12-10 18:40:50 +01:00
Jaime Marquínez Ferrándiz	cbfc470228	[mixcloud] Try to get the m4a url if the mp3 url fails to download (fixes #1939 )	2013-12-10 13:42:41 +01:00
Jaime Marquínez Ferrándiz	f67ca84d4a	[soundcloud] Fix the extension for 'downloadable' songs In this case the 'original_format' field must be used.	2013-12-10 13:04:21 +01:00
Philipp Hagemeister	e2b38da931	[mtv] Fixup incorrectly encoded XML documents	2013-12-10 12:45:22 +01:00
Philipp Hagemeister	a30a60d8eb	release 2013.12.10	2013-12-10 11:54:59 +01:00
Philipp Hagemeister	5a3ea17c94	[zdf] Correct order of unknown formats (#1936 )	2013-12-10 11:52:10 +01:00
Philipp Hagemeister	475700acfe	[soundcloud] Do not mistake original_format for ext (Fixes #1934 )	2013-12-10 11:45:13 +01:00
Philipp Hagemeister	45598aab08	[YoutubeDL] Simplify filename preparation	2013-12-10 11:23:35 +01:00
Jaime Marquínez Ferrándiz	26e6393134	Set 'NA' as the default value for missing fields in the output template (fixes #1931 ) Remove the `except KeyError` clause, it won't get raised anymore	2013-12-09 22:00:42 +01:00
Philipp Hagemeister	49929a20a7	release 2013.12.09.4	2013-12-09 20:05:27 +01:00
Philipp Hagemeister	f8bd0194a7	Remove superfluous spaces	2013-12-09 20:05:10 +01:00
Jaime Marquínez Ferrándiz	77526143e7	[brightcove] Use the original url (usually the player) as the default referer (fixes #1929 )	2013-12-09 20:01:43 +01:00
Philipp Hagemeister	4ff50ef846	[soundcloud] Do not match sets (Fixes #1930 )	2013-12-09 19:57:00 +01:00
Philipp Hagemeister	caefb1de87	[ndtv] Add extractor (Fixes #1924 )	2013-12-09 19:44:33 +01:00
Philipp Hagemeister	1e1f84dac9	release 2013.12.09.3	2013-12-09 18:56:17 +01:00
Philipp Hagemeister	1d87e3a1c6	[rtlnow] Allow double slashes after domain name (Fixes #1928 )	2013-12-09 18:56:05 +01:00