release 2014.11.23.1

Provide guidance when called with a YouTube ID starting with a dash.
Reported at https://news.ycombinator.com/item?id=8648121
2014-11-23 10:51:16 +01:00 · 2014-11-23 10:51:09 +01:00 · 2014-11-23 10:12:35 +01:00 · 2014-11-23 10:11:52 +01:00 · 2014-11-23 10:11:04 +01:00 · 2014-11-23 10:09:45 +01:00
25 changed files with 369 additions and 114 deletions
--- a/2
+++ b/2
@@ -82,3 +82,5 @@ Xavier Beynon
 Gabriel Schubiner
 xantares
 Jan Matějka
+Mauroy Sébastien
+William Sewell
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -47,6 +47,7 @@ from youtube_dl.utils import (
    js_to_json,
    get_filesystem_encoding,
    intlist_to_bytes,
+    args_to_str,
 )


@@ -361,5 +362,11 @@ class TestUtil(unittest.TestCase):
            intlist_to_bytes([0, 1, 127, 128, 255]),
            b'\x00\x01\x7f\x80\xff')

+    def test_args_to_str(self):
+        self.assertEqual(
+            args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
+            'foo ba/r -baz \'2 be\' \'\''
+        )
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -29,6 +29,7 @@ from .compat import (
    compat_str,
    compat_urllib_error,
    compat_urllib_request,
+    shlex_quote,
 )
 from .utils import (
    escape_url,
@@ -60,6 +61,7 @@ from .utils import (
    write_string,
    YoutubeDLHandler,
    prepend_extension,
+    args_to_str,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
@@ -253,6 +255,22 @@ class YoutubeDL(object):
            self.print_debug_header()
            self.add_default_info_extractors()

+    def warn_if_short_id(self, argv):
+        # short YouTube ID starting with dash?
+        idxs = [
+            i for i, a in enumerate(argv)
+            if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+        if idxs:
+            correct_argv = (
+                ['youtube-dl'] +
+                [a for i, a in enumerate(argv) if i not in idxs] +
+                ['--'] + [argv[i] for i in idxs]
+            )
+            self.report_warning(
+                'Long argument string detected. '
+                'Use -- to separate parameters and URLs, like this:\n%s\n' %
+                args_to_str(correct_argv))
+
    def add_info_extractor(self, ie):
        """Add an InfoExtractor object to the end of the list."""
        self._ies.append(ie)
@@ -624,7 +642,7 @@ class YoutubeDL(object):

            return self.process_ie_result(
                new_result, download=download, extra_info=extra_info)
-        elif result_type == 'playlist':
+        elif result_type == 'playlist' or result_type == 'multi_video':
            # We process each entry in the playlist
            playlist = ie_result.get('title', None) or ie_result.get('id', None)
            self.to_screen('[download] Downloading playlist: %s' % playlist)
@@ -679,6 +697,9 @@ class YoutubeDL(object):
            ie_result['entries'] = playlist_results
            return ie_result
        elif result_type == 'compat_list':
+            self.report_warning(
+                'Extractor %s returned a compat_list result. '
+                'It needs to be updated.' % ie_result.get('extractor'))
            def _fixup(r):
                self.add_extra_info(r,
                    {
@@ -1407,3 +1428,4 @@ class YoutubeDL(object):
        if encoding is None:
            encoding = preferredencoding()
        return encoding
+
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -334,11 +334,12 @@ def _real_main(argv=None):

        # Maybe do nothing
        if (len(all_urls) < 1) and (opts.load_info_filename is None):
-            if not (opts.update_self or opts.rm_cachedir):
-                parser.error('you must provide at least one URL')
-            else:
+            if opts.update_self or opts.rm_cachedir:
                sys.exit()

+            ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+            parser.error('you must provide at least one URL')
+
        try:
            if opts.load_info_filename is not None:
                retcode = ydl.download_with_info_file(opts.load_info_filename)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import getpass
 import optparse
 import os
+import re
 import subprocess
 import sys

@@ -174,7 +175,10 @@ try:
    from shlex import quote as shlex_quote
 except ImportError:  # Python < 3.3
    def shlex_quote(s):
-        return "'" + s.replace("'", "'\"'\"'") + "'"
+        if re.match(r'^[-_\w./]+$', s):
+            return s
+        else:
+            return "'" + s.replace("'", "'\"'\"'") + "'"


 def compat_ord(c):
@@ -306,7 +310,7 @@ def workaround_optparse_bug9161():
    og = optparse.OptionGroup(op, 'foo')
    try:
        og.add_option('-t')
-    except TypeError as te:
+    except TypeError:
        real_add_option = optparse.OptionGroup.add_option

        def _compat_add_option(self, *args, **kwargs):
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -115,6 +115,7 @@ from .fktv import (
    FKTVPosteckeIE,
 )
 from .flickr import FlickrIE
+from .folketinget import FolketingetIE
 from .fourtube import FourTubeIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
@@ -379,6 +380,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
@@ -452,7 +454,10 @@ from .vine import (
    VineUserIE,
 )
 from .viki import VikiIE
-from .vk import VKIE
+from .vk import (
+    VKIE,
+    VKUserVideosIE,
+)
 from .vodlocker import VodlockerIE
 from .vporn import VpornIE
 from .vrt import VRTIE
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -5,7 +5,6 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
    find_xpath_attr,
    unified_strdate,
    get_element_by_id,
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -166,9 +166,17 @@ class BlipTVIE(SubtitlesInfoExtractor):


 class BlipTVUserIE(InfoExtractor):
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
    _PAGE_SIZE = 12
    IE_NAME = 'blip.tv:user'
+    _TEST = {
+        'url': 'http://blip.tv/actone',
+        'info_dict': {
+            'id': 'actone',
+            'title': 'Act One: The Series',
+        },
+        'playlist_count': 5,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -179,6 +187,7 @@ class BlipTVUserIE(InfoExtractor):
        page = self._download_webpage(url, username, 'Downloading user page')
        mobj = re.search(r'data-users-id="([^"]+)"', page)
        page_base = page_base % mobj.group(1)
+        title = self._og_search_title(page)

        # Download video ids using BlipTV Ajax calls. Result size per
        # query is limited (currently to 12 videos) so we need to query
@@ -215,4 +224,5 @@ class BlipTVUserIE(InfoExtractor):

        urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
        url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
-        return [self.playlist_result(url_entries, playlist_title=username)]
+        return self.playlist_result(
+            url_entries, playlist_title=title, playlist_id=username)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -111,6 +111,8 @@ class BrightcoveIE(InfoExtractor):
                            lambda m: m.group(1) + '/>', object_str)
        # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
        object_str = object_str.replace('<--', '<!--')
+        # remove namespace to simplify extraction
+        object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
        object_str = fix_xml_ampersands(object_str)

        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
@@ -219,7 +221,7 @@ class BrightcoveIE(InfoExtractor):
        webpage = self._download_webpage(req, video_id)

        error_msg = self._html_search_regex(
-            r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
+            r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
            'error message', default=None)
        if error_msg is not None:
            raise ExtractorError(
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -2,7 +2,6 @@ from __future__ import unicode_literals

 import re

-from .common import InfoExtractor
 from .mtv import MTVServicesInfoExtractor
 from ..utils import (
    compat_str,
@@ -110,9 +109,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
+        mobj = re.match(self._VALID_URL, url)

        if mobj.group('shortname'):
            if mobj.group('shortname') in ('tds', 'thedailyshow'):
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -43,7 +43,11 @@ class InfoExtractor(object):
    information possibly downloading the video to the file system, among
    other possible outcomes.

-    The dictionaries must include the following fields:
+    The type field determines the the type of the result.
+    By far the most common value (and the default if _type is missing) is
+    "video", which indicates a single video.
+
+    For a video, the dictionaries must include the following fields:

    id:             Video identifier.
    title:          Video title, unescaped.
@@ -151,6 +155,38 @@ class InfoExtractor(object):

    Unless mentioned otherwise, None is equivalent to absence of information.

+
+    _type "playlist" indicates multiple videos.
+    There must be a key "entries", which is a list or a PagedList object, each
+    element of which is a valid dictionary under this specfication.
+
+    Additionally, playlists can have "title" and "id" attributes with the same
+    semantics as videos (see above).
+
+
+    _type "multi_video" indicates that there are multiple videos that
+    form a single show, for examples multiple acts of an opera or TV episode.
+    It must have an entries key like a playlist and contain all the keys
+    required for a video at the same time.
+
+
+    _type "url" indicates that the video must be extracted from another
+    location, possibly by a different extractor. Its only required key is:
+    "url" - the next URL to extract.
+
+    Additionally, it may have properties believed to be identical to the
+    resolved entity, for example "title" if the title of the referred video is
+    known ahead of time.
+
+
+    _type "url_transparent" entities have the same specification as "url", but
+    indicate that the given additional information is more precise than the one
+    associated with the resolved URL.
+    This is useful when a site employs a video service that hosts the video and
+    its technical metadata, but that video service does not embed a useful
+    title, description etc.
+
+
    Subclasses of this one should re-define the _real_initialize() and
    _real_extract() methods and define a _VALID_URL regexp.
    Probably, they should also be added to the list of extractors.
--- a/youtube_dl/extractor/folketinget.py
+++ b/youtube_dl/extractor/folketinget.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class FolketingetIE(InfoExtractor):
+    IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
+    _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
+    _TEST = {
+        'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+        'info_dict': {
+            'id': '1165642',
+            'ext': 'mp4',
+            'title': 'Åbent samråd i Erhvervsudvalget',
+            'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
+            'view_count': int,
+            'width': 768,
+            'height': 432,
+            'tbr': 928000,
+            'timestamp': 1416493800,
+            'upload_date': '20141120',
+            'duration': 3960,
+        },
+        'params': {
+            'skip_download': 'rtmpdump required',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
+            webpage, 'description', fatal=False)
+
+        player_params = compat_parse_qs(self._search_regex(
+            r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
+            webpage, 'player params'))
+        xml_url = player_params['xml'][0]
+        doc = self._download_xml(xml_url, video_id)
+
+        timestamp = parse_iso8601(xpath_text(doc, './/date'))
+        duration = parse_duration(xpath_text(doc, './/duration'))
+        width = int_or_none(xpath_text(doc, './/width'))
+        height = int_or_none(xpath_text(doc, './/height'))
+        view_count = int_or_none(xpath_text(doc, './/views'))
+
+        formats = [{
+            'format_id': n.attrib['bitrate'],
+            'url': xpath_text(n, './url', fatal=True),
+            'tbr': int_or_none(n.attrib['bitrate']),
+        } for n in doc.findall('.//streams/stream')]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'timestamp': timestamp,
+            'width': width,
+            'height': height,
+            'duration': duration,
+            'view_count': view_count,
+        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -979,7 +979,7 @@ class GenericIE(InfoExtractor):
                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
        if not found:
            # HTML5 video
-            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src="([^"]+)"', webpage)
+            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
        if not found:
            found = re.search(
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
--- a/youtube_dl/extractor/goldenmoustache.py
+++ b/youtube_dl/extractor/goldenmoustache.py
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals

-import re
 from .common import InfoExtractor
 from ..utils import (
-    parse_duration,
    int_or_none,
 )

--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -145,7 +145,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
        idoc = self._download_xml(
            feed_url + '?' + data, video_id,
            'Downloading info', transform_source=fix_xml_ampersands)
-        return [self._get_video_info(item) for item in idoc.findall('.//item')]
+        return self.playlist_result(
+            [self._get_video_info(item) for item in idoc.findall('.//item')])

    def _real_extract(self, url):
        title = url_basename(url)
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -28,9 +28,8 @@ class RtlXlIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        uuid = mobj.group('uuid')

-        # Use m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
        info = self._download_json(
-            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/d=pc/fmt=adaptive/' % uuid,
+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
            uuid)

        material = info['material'][0]
@@ -39,12 +38,13 @@ class RtlXlIE(InfoExtractor):
        progname = info['abstracts'][0]['name']
        subtitle = material['title'] or info['episodes'][0]['name']

-        videopath = material['videopath']
+        # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
+        videopath = material['videopath'].replace('.f4m', '.m3u8')
        m3u8_url = 'http://manifest.us.rtl.nl' + videopath

        formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')

-        video_urlpart = videopath.split('/adaptive/')[1][:-4]
+        video_urlpart = videopath.split('/flash/')[1][:-4]
        PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'

        formats.extend([
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -9,24 +11,23 @@ from ..utils import (


 class StanfordOpenClassroomIE(InfoExtractor):
-    IE_NAME = u'stanfordoc'
-    IE_DESC = u'Stanford Open ClassRoom'
-    _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+    IE_NAME = 'stanfordoc'
+    IE_DESC = 'Stanford Open ClassRoom'
+    _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    _TEST = {
-        u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
-        u'file': u'PracticalUnix_intro-environment.mp4',
-        u'md5': u'544a9468546059d4e80d76265b0443b8',
-        u'info_dict': {
-            u"title": u"Intro Environment"
+        'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
+        'md5': '544a9468546059d4e80d76265b0443b8',
+        'info_dict': {
+            'id': 'PracticalUnix_intro-environment',
+            'ext': 'mp4',
+            'title': 'Intro Environment',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)

-        if mobj.group('course') and mobj.group('video'): # A specific video
+        if mobj.group('course') and mobj.group('video'):  # A specific video
            course = mobj.group('course')
            video = mobj.group('video')
            info = {
@@ -35,7 +36,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
                'upload_date': None,
            }

-            self.report_extraction(info['id'])
            baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
            xmlUrl = baseUrl + video + '.xml'
            mdoc = self._download_xml(xmlUrl, info['id'])
@@ -43,63 +43,49 @@ class StanfordOpenClassroomIE(InfoExtractor):
                info['title'] = mdoc.findall('./title')[0].text
                info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
            except IndexError:
-                raise ExtractorError(u'Invalid metadata XML file')
-            info['ext'] = info['url'].rpartition('.')[2]
-            return [info]
-        elif mobj.group('course'): # A course page
+                raise ExtractorError('Invalid metadata XML file')
+            return info
+        elif mobj.group('course'):  # A course page
            course = mobj.group('course')
            info = {
                'id': course,
-                'type': 'playlist',
+                '_type': 'playlist',
                'uploader': None,
                'upload_date': None,
            }

-            coursepage = self._download_webpage(url, info['id'],
-                                        note='Downloading course info page',
-                                        errnote='Unable to download course info page')
+            coursepage = self._download_webpage(
+                url, info['id'],
+                note='Downloading course info page',
+                errnote='Unable to download course info page')

-            info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
+            info['title'] = self._html_search_regex(
+                r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])

-            info['description'] = self._html_search_regex('<description>([^<]+)</description>',
-                coursepage, u'description', fatal=False)
+            info['description'] = self._html_search_regex(
+                r'(?s)<description>([^<]+)</description>',
+                coursepage, 'description', fatal=False)

            links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
-            info['list'] = [
-                {
-                    'type': 'reference',
-                    'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
-                }
-                    for vpage in links]
-            results = []
-            for entry in info['list']:
-                assert entry['type'] == 'reference'
-                results += self.extract(entry['url'])
-            return results
-        else: # Root page
+            info['entries'] = [self.url_result(
+                'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+            ) for l in links]
+            return info
+        else:  # Root page
            info = {
                'id': 'Stanford OpenClassroom',
-                'type': 'playlist',
+                '_type': 'playlist',
                'uploader': None,
                'upload_date': None,
            }
+            info['title'] = info['id']

            rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
            rootpage = self._download_webpage(rootURL, info['id'],
-                errnote=u'Unable to download course info page')
-
-            info['title'] = info['id']
+                errnote='Unable to download course info page')

            links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
-            info['list'] = [
-                {
-                    'type': 'reference',
-                    'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
-                }
-                    for cpage in links]
-
-            results = []
-            for entry in info['list']:
-                assert entry['type'] == 'reference'
-                results += self.extract(entry['url'])
-            return results
+            info['entries'] = [self.url_result(
+                'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+            ) for l in links]
+            return info
--- a/youtube_dl/extractor/sztvhu.py
+++ b/youtube_dl/extractor/sztvhu.py
@@ -1,27 +1,24 @@
 # -*- coding: utf-8 -*-
-
-import re
+from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..utils import determine_ext


 class SztvHuIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+    _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
    _TEST = {
-        u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
-        u'file': u'20130909.mp4',
-        u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
-        u'info_dict': {
-            u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
-            u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
+        'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
+        'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
+        'info_dict': {
+            'id': '20130909',
+            'ext': 'mp4',
+            'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
+            'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
        },
-        u'skip': u'Service temporarily disabled as of 2013-11-20'
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_file = self._search_regex(
            r'file: "...:(.*?)",', webpage, 'video file')
@@ -39,7 +36,6 @@ class SztvHuIE(InfoExtractor):
            'id': video_id,
            'url': video_url,
            'title': title,
-            'ext': determine_ext(video_url),
            'description': description,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/telebruxelles.py
+++ b/youtube_dl/extractor/telebruxelles.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TeleBruxellesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
+    _TESTS = [{
+        'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
+        'md5': '59439e568c9ee42fb77588b2096b214f',
+        'info_dict': {
+            'id': '11942',
+            'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus',
+            'ext': 'flv',
+            'title': 'Parlement : Francken et Galant répondent aux interpellations de l’opposition',
+            'description': 're:Les auditions des ministres se poursuivent*'
+        },
+        'params': {
+            'skip_download': 'requires rtmpdump'
+        },
+    }, {
+        'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/',
+        'md5': '181d3fbdcf20b909309e5aef5c6c6047',
+        'info_dict': {
+            'id': '10091',
+            'display_id': 'basket-brussels-bat-mons-80-74',
+            'ext': 'flv',
+            'title': 'Basket : le Brussels bat Mons 80-74',
+            'description': 're:^Ils l\u2019on fait ! En basket, le B*',
+        },
+        'params': {
+            'skip_download': 'requires rtmpdump'
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        article_id = self._html_search_regex(
+            r"<article id=\"post-(\d+)\"", webpage, 'article ID')
+        title = self._html_search_regex(
+            r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
+        description = self._og_search_description(webpage)
+
+        rtmp_url = self._html_search_regex(
+            r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
+            webpage, 'RTMP url')
+        rtmp_url = rtmp_url.replace("\" + \"", "")
+
+        return {
+            'id': article_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'url': rtmp_url,
+            'ext': 'flv',
+            'rtmp_live': True  # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed
+        }
--- a/youtube_dl/extractor/vh1.py
+++ b/youtube_dl/extractor/vh1.py
@@ -121,4 +121,7 @@ class VH1IE(MTVIE):
        idoc = self._download_xml(
            doc_url, video_id,
            'Downloading info', transform_source=fix_xml_ampersands)
-        return [self._get_video_info(item) for item in idoc.findall('.//item')]
+        return self.playlist_result(
+            [self._get_video_info(item) for item in idoc.findall('.//item')],
+            playlist_id=video_id,
+        )
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -11,12 +11,13 @@ from ..utils import (
    compat_urllib_parse,
    compat_str,
    unescapeHTML,
-)
+    unified_strdate,
+    orderedSet)


 class VKIE(InfoExtractor):
    IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
    _NETRC_MACHINE = 'vk'

    _TESTS = [
@@ -29,17 +30,19 @@ class VKIE(InfoExtractor):
                'title': 'ProtivoGunz - Хуёвая песня',
                'uploader': 're:Noize MC.*',
                'duration': 195,
+                'upload_date': '20120212',
            },
        },
        {
-            'url': 'http://vk.com/video4643923_163339118',
-            'md5': 'f79bccb5cd182b1f43502ca5685b2b36',
+            'url': 'http://vk.com/video205387401_165548505',
+            'md5': '6c0aeb2e90396ba97035b9cbde548700',
            'info_dict': {
-                'id': '163339118',
+                'id': '165548505',
                'ext': 'mp4',
-                'uploader': 'Elya Iskhakova',
-                'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
-                'duration': 558,
+                'uploader': 'Tom Cruise',
+                'title': 'No name',
+                'duration': 9,
+                'upload_date': '20130721'
            }
        },
        {
@@ -52,9 +55,12 @@ class VKIE(InfoExtractor):
                'uploader': 'Vladimir Gavrin',
                'title': 'Lin Dan',
                'duration': 101,
+                'upload_date': '20120730',
            }
        },
        {
+            # VIDEO NOW REMOVED
+            # please update if you find a video whose URL follows the same pattern
            'url': 'http://vk.com/video-8871596_164049491',
            'md5': 'a590bcaf3d543576c9bd162812387666',
            'note': 'Only available for registered users',
@@ -64,18 +70,7 @@ class VKIE(InfoExtractor):
                'uploader': 'Триллеры',
                'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
                'duration': 8352,
-            },
-            'skip': 'Requires vk account credentials',
-        },
-        {
-            'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
-            'md5': 'd82c22e449f036282d1d3f7f4d276869',
-            'info_dict': {
-                'id': '166094326',
-                'ext': 'mp4',
-                'uploader': 'Киномания - лучшее из мира кино',
-                'title': 'Запах женщины (1992)',
-                'duration': 9392,
+                'upload_date': '20121218'
            },
            'skip': 'Requires vk account credentials',
        },
@@ -88,6 +83,7 @@ class VKIE(InfoExtractor):
                'uploader': 'Киномания - лучшее из мира кино',
                'title': ' ',
                'duration': 7291,
+                'upload_date': '20140328',
            },
            'skip': 'Requires vk account credentials',
        },
@@ -100,9 +96,15 @@ class VKIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Книга Илая',
                'duration': 6771,
+                'upload_date': '20140626',
            },
            'skip': 'Only works from Russia',
        },
+        {
+            # removed video, just testing that we match the pattern
+            'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+            'only_matching': True,
+        },
    ]

    def _login(self):
@@ -169,6 +171,13 @@ class VKIE(InfoExtractor):
        data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
        data = json.loads(data_json)

+        # Extract upload date
+        upload_date = None
+        mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
+        if mobj is not None:
+            x = mobj.group(1) + ' ' + mobj.group(2)
+            upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
+
        formats = [{
            'format_id': k,
            'url': v,
@@ -183,5 +192,28 @@ class VKIE(InfoExtractor):
            'title': unescapeHTML(data['md_title']),
            'thumbnail': data.get('jpg'),
            'uploader': data.get('md_author'),
-            'duration': data.get('duration')
+            'duration': data.get('duration'),
+            'upload_date': upload_date,
        }
+
+
+class VKUserVideosIE(InfoExtractor):
+    IE_NAME = 'vk.com:user-videos'
+    IE_DESC = 'vk.com:All of a user\'s videos'
+    _VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
+    _TEMPLATE_URL = 'https://vk.com/videos'
+    _TEST = {
+        'url': 'http://vk.com/videos205387401',
+        'playlist_mincount': 4,
+    }
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        page = self._download_webpage(url, page_id)
+        video_ids = orderedSet(
+            m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
+        url_entries = [
+            self.url_result(
+                'http://vk.com/video' + video_id, 'VK', video_id=video_id)
+            for video_id in video_ids]
+        return self.playlist_result(url_entries, page_id)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -406,6 +406,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'format': '141',
            },
        },
+        # Controversy video
+        {
+            'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
+            'info_dict': {
+                'id': 'T4XJQO3qol8',
+                'ext': 'mp4',
+                'upload_date': '20100909',
+                'uploader': 'The Amazing Atheist',
+                'uploader_id': 'TheAmazingAtheist',
+                'title': 'Burning Everyone\'s Koran',
+                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
+            }
+        }
    ]

    def __init__(self, *args, **kwargs):
@@ -666,7 +679,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        video_id = self.extract_id(url)

        # Get video webpage
-        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
        pref_cookies = [
            c for c in self._downloader.cookiejar
            if c.domain == '.youtube.com' and c.name == 'PREF']
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -61,7 +61,7 @@ class JSInterpreter(object):
            pass

        m = re.match(
-            r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+            r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
            expr)
        if m:
            variable = m.group('var')
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -41,6 +41,7 @@ from .compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urlparse,
+    shlex_quote,
 )


@@ -1433,3 +1434,8 @@ def ytdl_is_updateable():
    from zipimport import zipimporter

    return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+    # Get a short string representation for a subprocess command
+    return ' '.join(shlex_quote(a) for a in args)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.11.20.1'
+__version__ = '2014.11.23.1'
Author	SHA1	Message	Date
Philipp Hagemeister	835a22ef3f	release 2014.11.23.1	2014-11-23 10:51:16 +01:00
Philipp Hagemeister	7d4111ed14	Provide guidance when called with a YouTube ID starting with a dash. Reported at https://news.ycombinator.com/item?id=8648121	2014-11-23 10:51:09 +01:00
Philipp Hagemeister	d37cab2a9d	Credit @WillSewell for vk:user (#4233 )	2014-11-23 10:12:35 +01:00
Philipp Hagemeister	d16abf434a	[vk] Some PEP8 love	2014-11-23 10:11:52 +01:00
Philipp Hagemeister	a8363f3ab7	[vk] Clarify test	2014-11-23 10:11:04 +01:00
Philipp Hagemeister	010cd3a3ee	Merge remote-tracking branch 'WillSewell/vk-playlists'	2014-11-23 10:09:45 +01:00
Philipp Hagemeister	b9042def9d	release 2014.11.23	2014-11-23 09:59:42 +01:00
Philipp Hagemeister	aa79ac0c82	[youtube] Support controversy videos (Fixes #4275 )	2014-11-23 09:59:02 +01:00
Philipp Hagemeister	88125905cf	Credit @nulloz for telebruxelles (#4270 )	2014-11-23 09:49:23 +01:00
Philipp Hagemeister	dd60be2bf9	[telebruxelles] Simplify (#4270 )	2014-11-23 09:44:42 +01:00
Philipp Hagemeister	119b3caa46	Merge remote-tracking branch 'nulloz/telebruxelles'	2014-11-23 09:38:18 +01:00
Naglis Jonaitis	49f0da7ae1	[rtlxl] Use unencrypted m3u8 streams (#4115 )	2014-11-22 21:06:45 +02:00
nulloz	2cead7e7bc	telebruxelles Add new extractor	2014-11-22 13:34:29 +01:00
Will Sewell	9262867e86	[vk.com] Added newline at the end of the file.	2014-11-21 23:25:05 +00:00
Will Sewell	b9272e8f8f	[vk.com] Removed redundant log message -- this information is already being logged.	2014-11-21 23:22:52 +00:00
Will Sewell	021a0db8f7	[vk.com] Simplified the page_id acquisition by using the id matched in the URL earlier on.	2014-11-21 23:22:44 +00:00
Will Sewell	e1e8b6897b	[vk.com] Updated the extract_videos_from_page function with a much simpler 1-liner.	2014-11-21 23:16:12 +00:00
Will Sewell	53d1cd1f77	[vk.com] Updated the _VALID_URL regex for the playlist IE. Removed optional m, and named the id group.	2014-11-21 23:03:31 +00:00
Will Sewell	cad985ab4d	[vk.com] Updated the description to include vk.com.	2014-11-21 23:00:43 +00:00
Will Sewell	c52331f30c	[vk.com] Updated a test video that has been removed, and added a comment for others to update two other test videos that are also now removed.	2014-11-21 23:00:33 +00:00
Will Sewell	42e1ff8665	[vk.com] Added upload_date variable to the test cases that still work.	2014-11-21 23:00:17 +00:00
Philipp Hagemeister	2c64b8ba63	release 2014.11.21.1	2014-11-21 22:47:23 +01:00
Philipp Hagemeister	42e12102a9	[YoutubeDL] Fix multi_video check	2014-11-21 22:39:57 +01:00
Philipp Hagemeister	6127693ed9	[folketinget] Add extractor (Fixes #4262 )	2014-11-21 22:36:24 +01:00
Philipp Hagemeister	71069d2157	[sztv] Remove useless determine_ext call	2014-11-21 22:03:29 +01:00
Philipp Hagemeister	f3391db889	[sztvhu] Modernize	2014-11-21 22:02:16 +01:00
Philipp Hagemeister	9b32eca3ce	[generic] Add support for single quotes in HTML5 videos (Fixes #4265 )	2014-11-21 22:01:25 +01:00
Philipp Hagemeister	ec06f0f610	release 2014.11.21	2014-11-21 10:41:18 +01:00
Philipp Hagemeister	e6c9c8f6ee	Merge pull request #4261 from tinybug/patch-4 Update jsinterp.py	2014-11-21 10:41:02 +01:00
tinybug	85b9275517	Update jsinterp.py http://s.ytimg.com/yts/jsbin/html5player-zh_HK-vfl1NK6PR/html5player.js fix raise ExtractorError	2014-11-21 17:09:22 +08:00
Philipp Hagemeister	dfd5313afd	[YoutubeDL] Support new _type multi_video	2014-11-21 00:25:46 +01:00
Philipp Hagemeister	be53e2a737	[blip.tv:user] Modernize and add a test	2014-11-21 00:25:13 +01:00
Philipp Hagemeister	a1c68b9ef2	Merge remote-tracking branch 'origin/master'	2014-11-21 00:17:58 +01:00
Jaime Marquínez Ferrándiz	4d46c1c68c	[brightcove] Improve error message detection (#4256 )	2014-11-20 18:44:54 +01:00
Jaime Marquínez Ferrándiz	d6f714f321	[brightcove] Remove the namespace from the BrightcoveExperience html object	2014-11-20 18:37:08 +01:00
Philipp Hagemeister	8569f3d629	[vh1] Modernize	2014-11-20 16:51:33 +01:00
Philipp Hagemeister	fed5d03260	[extractor/common] Document _type values (Motivated by #4254 )	2014-11-20 16:47:59 +01:00
Philipp Hagemeister	6adeffa7c6	[comedycentral] Modernize	2014-11-20 16:36:53 +01:00
Philipp Hagemeister	b244b5c3f9	remove unused imports	2014-11-20 16:36:13 +01:00
Philipp Hagemeister	f42c190769	[stanfordoc] Modernize	2014-11-20 16:34:54 +01:00
Philipp Hagemeister	c9bf41145f	[YoutubeDL] Warn if an extractor returns compat_list	2014-11-20 16:29:31 +01:00
Philipp Hagemeister	5239075bb6	[mtv] Return a proper playlist result (#4254 )	2014-11-20 16:25:19 +01:00
Will Sewell	02a12f9fe6	[vk] date_added is now extracted from the video page.	2014-11-18 20:19:56 +00:00
Will Sewell	6fcd6e0e21	[vk] Updated the regex for matching user video pages. It now matches optional URL parameters too.	2014-11-18 19:34:12 +00:00
Will Sewell	469d4c8968	[vk] Added a new information extractor for pages that are a list of a user\'s videos on vk.com. It works in a same way to playlist style pages for the YT information extractors.	2014-11-17 17:53:34 -05:00