release 2015.08.23

[yandexmusic:track] Eliminate base class
[yandexmusic] Defer link resolve till actual download time to prevent link expiry (Closes #6650 )
2015-08-23 23:52:47 +02:00 · 2015-08-24 00:36:54 +06:00 · 2015-08-24 00:36:24 +06:00 · 2015-08-23 22:33:26 +06:00 · 2015-08-23 22:32:44 +06:00 · 2015-08-23 22:32:20 +06:00
23 changed files with 728 additions and 289 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,9 +5,7 @@ python:
  - "3.2"
  - "3.3"
  - "3.4"
-before_install:
-  - sudo apt-get update -qq
-  - sudo apt-get install -yqq rtmpdump
+sudo: false
 script: nosetests test --verbose
 notifications:
  email:
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -301,6 +301,7 @@
 - **Moviezine**
 - **movshare**: MovShare
 - **MPORA**
+ - **MSNBC**
 - **MTV**
 - **mtviggy.com**
 - **mtvservices:embedded**
@@ -308,6 +309,7 @@
 - **MusicPlayOn**
 - **MusicVault**
 - **muzu.tv**
+ - **Mwave**
 - **MySpace**
 - **MySpace:album**
 - **MySpass**
@@ -392,6 +394,8 @@
 - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
 - **Playvid**
 - **Playwire**
+ - **pluralsight**
+ - **pluralsight:course**
 - **plus.google**: Google Plus
 - **pluzz.francetv.fr**
 - **podomatic**
@@ -534,6 +538,7 @@
 - **TF1**
 - **TheOnion**
 - **ThePlatform**
+ - **ThePlatformFeed**
 - **TheSixtyOne**
 - **ThisAmericanLife**
 - **ThisAV**
@@ -599,7 +604,6 @@
 - **Viddler**
 - **video.google:search**: Google Video search
 - **video.mit.edu**
- - **VideoBam**
 - **VideoDetective**
 - **videofy.me**
 - **videolectures.net**
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
    RaiIE,
    VikiIE,
    ThePlatformIE,
+    ThePlatformFeedIE,
    RTVEALaCartaIE,
    FunnyOrDieIE,
 )
@@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
        self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')


+class TestThePlatformFeedSubtitles(BaseTestSubtitles):
+    url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
+    IE = ThePlatformFeedIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
+
+
 class TestRtveSubtitles(BaseTestSubtitles):
    url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
    IE = RTVEALaCartaIE
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -345,6 +345,7 @@ from .muenchentv import MuenchenTVIE
 from .musicplayon import MusicPlayOnIE
 from .musicvault import MusicVaultIE
 from .muzu import MuzuTVIE
+from .mwave import MwaveIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvi import MyviIE
@@ -358,6 +359,7 @@ from .nbc import (
    NBCNewsIE,
    NBCSportsIE,
    NBCSportsVPlayerIE,
+    MSNBCIE,
 )
 from .ndr import (
    NDRIE,
@@ -452,6 +454,10 @@ from .playfm import PlayFMIE
 from .playtvak import PlaytvakIE
 from .playvid import PlayvidIE
 from .playwire import PlaywireIE
+from .pluralsight import (
+    PluralsightIE,
+    PluralsightCourseIE,
+)
 from .podomatic import PodomaticIE
 from .porn91 import Porn91IE
 from .pornhd import PornHdIE
@@ -612,7 +618,10 @@ from .testurl import TestURLIE
 from .testtube import TestTubeIE
 from .tf1 import TF1IE
 from .theonion import TheOnionIE
-from .theplatform import ThePlatformIE
+from .theplatform import (
+    ThePlatformIE,
+    ThePlatformFeedIE,
+)
 from .thesixtyone import TheSixtyOneIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
@@ -696,7 +705,6 @@ from .vgtv import (
 from .vh1 import VH1IE
 from .vice import ViceIE
 from .viddler import ViddlerIE
-from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1052,7 +1052,7 @@ class InfoExtractor(object):
        return self._search_regex(
            r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)

-    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
+    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
        base = smil_url
        for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
            b = meta.get('base') or meta.get('httpBase')
@@ -1091,6 +1091,12 @@ class InfoExtractor(object):
                    'width': width,
                    'height': height,
                })
+                if transform_rtmp_url:
+                    streamer, src = transform_rtmp_url(streamer, src)
+                    formats[-1].update({
+                        'url': streamer,
+                        'play_path': src,
+                    })
                continue

            src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
@@ -1129,7 +1135,7 @@ class InfoExtractor(object):

        return formats

-    def _parse_smil_subtitles(self, smil, namespace=None):
+    def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
        subtitles = {}
        for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
            src = textstream.get('src')
@@ -1138,9 +1144,14 @@ class InfoExtractor(object):
            ext = textstream.get('ext') or determine_ext(src)
            if not ext:
                type_ = textstream.get('type')
-                if type_ == 'text/srt':
-                    ext = 'srt'
-            lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
+                SUBTITLES_TYPES = {
+                    'text/vtt': 'vtt',
+                    'text/srt': 'srt',
+                    'application/smptett+xml': 'tt',
+                }
+                if type_ in SUBTITLES_TYPES:
+                    ext = SUBTITLES_TYPES[type_]
+            lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
            subtitles.setdefault(lang, []).append({
                'url': src,
                'ext': ext,
@@ -1268,6 +1279,23 @@ class InfoExtractor(object):
    def _get_subtitles(self, *args, **kwargs):
        raise NotImplementedError("This method must be implemented by subclasses")

+    @staticmethod
+    def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+        """ Merge subtitle items for one language. Items with duplicated URLs
+        will be dropped. """
+        list1_urls = set([item['url'] for item in subtitle_list1])
+        ret = list(subtitle_list1)
+        ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+        return ret
+
+    @classmethod
+    def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+        """ Merge two subtitle dictionaries, language by language. """
+        ret = dict(subtitle_dict1)
+        for lang in subtitle_dict2:
+            ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+        return ret
+
    def extract_automatic_captions(self, *args, **kwargs):
        if (self._downloader.params.get('writeautomaticsub', False) or
                self._downloader.params.get('listsubtitles')):
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -14,11 +14,13 @@ from ..compat import (
    compat_urllib_parse,
    compat_urllib_parse_unquote,
    compat_urllib_request,
+    compat_urlparse,
 )
 from ..utils import (
    ExtractorError,
    bytes_to_intlist,
    intlist_to_bytes,
+    remove_end,
    unified_strdate,
    urlencode_postdata,
 )
@@ -279,6 +281,20 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
            stream_info = streamdata.find('./{default}preload/stream_info')
            video_url = stream_info.find('./host').text
            video_play_path = stream_info.find('./file').text
+
+            if '.fplive.net/' in video_url:
+                video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
+                parsed_video_url = compat_urlparse.urlparse(video_url)
+                direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
+                    netloc='v.lvlt.crcdn.net',
+                    path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
+                if self._is_valid_url(direct_video_url, video_id, video_format):
+                    formats.append({
+                        'url': direct_video_url,
+                        'format_id': video_format,
+                    })
+                    continue
+
            formats.append({
                'url': video_url,
                'play_path': video_play_path,
--- a/youtube_dl/extractor/folketinget.py
+++ b/youtube_dl/extractor/folketinget.py
@@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor):
            'upload_date': '20141120',
            'duration': 3960,
        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)


 class KontrTubeIE(InfoExtractor):
@@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor):
        webpage = self._download_webpage(
            url, display_id, 'Downloading page')

-        video_url = self._html_search_regex(
+        video_url = self._search_regex(
            r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
-        thumbnail = self._html_search_regex(
-            r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
+        thumbnail = self._search_regex(
+            r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
        title = self._html_search_regex(
-            r'<title>(.+?)</title>', webpage, 'video title')
+            r'(?s)<h2>(.+?)</h2>', webpage, 'title')
        description = self._html_search_meta(
-            'description', webpage, 'video description')
+            'description', webpage, 'description')

-        mobj = re.search(
-            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
-            webpage)
-        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
+        duration = self._search_regex(
+            r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
+        if duration:
+            duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))

-        view_count = self._html_search_regex(
-            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
+        view_count = self._search_regex(
+            r'Просмотров: <em>([^<]+)</em>',
            webpage, 'view count', fatal=False)
+        if view_count:
+            view_count = int_or_none(view_count.replace(' ', ''))

-        comment_count = None
-        comment_str = self._html_search_regex(
-            r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
-        if comment_str.startswith('комментариев нет'):
-            comment_count = 0
-        else:
-            mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
-            if mobj:
-                comment_count = mobj.group('total')
+        comment_count = int_or_none(self._search_regex(
+            r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))

        return {
            'id': video_id,
--- a/youtube_dl/extractor/libsyn.py
+++ b/youtube_dl/extractor/libsyn.py
@@ -8,9 +8,9 @@ from ..utils import unified_strdate


 class LibsynIE(InfoExtractor):
-    _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
+    _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
        'md5': '443360ee1b58007bc3dcf09b41d093bb',
        'info_dict': {
@@ -19,12 +19,24 @@ class LibsynIE(InfoExtractor):
            'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
            'description': 'md5:601cb790edd05908957dae8aaa866465',
            'upload_date': '20150220',
+            'thumbnail': 're:^https?://.*',
        },
-    }
+    }, {
+        'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
+        'md5': '6c5cb21acd622d754d3b1a92b582ce42',
+        'info_dict': {
+            'id': '3727166',
+            'ext': 'mp3',
+            'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
+            'upload_date': '20150818',
+            'thumbnail': 're:^https?://.*',
+        }
+    }]

    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+        url = m.group('mainurl')
        webpage = self._download_webpage(url, video_id)

        formats = [{
@@ -32,20 +44,18 @@ class LibsynIE(InfoExtractor):
        } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]

        podcast_title = self._search_regex(
-            r'<h2>([^<]+)</h2>', webpage, 'title')
+            r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
        episode_title = self._search_regex(
-            r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
+            r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')

        title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title

        description = self._html_search_regex(
            r'<div id="info_text_body">(.+?)</div>', webpage,
-            'description', fatal=False)
-
+            'description', default=None)
        thumbnail = self._search_regex(
            r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
            webpage, 'thumbnail', fatal=False)
-
        release_date = unified_strdate(self._search_regex(
            r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))

--- a/youtube_dl/extractor/mwave.py
+++ b/youtube_dl/extractor/mwave.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)
+
+
+class MwaveIE(InfoExtractor):
+    _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
+        'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
+        'info_dict': {
+            'id': '168859',
+            'ext': 'flv',
+            'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'M COUNTDOWN',
+            'duration': 206,
+            'view_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        vod_info = self._download_json(
+            'http://mwave.interest.me/onair/vod_info.m?vodtype=CL&sectorid=&endinfo=Y&id=%s' % video_id,
+            video_id, 'Download vod JSON')
+
+        formats = []
+        for num, cdn_info in enumerate(vod_info['cdn']):
+            stream_url = cdn_info.get('url')
+            if not stream_url:
+                continue
+            stream_name = cdn_info.get('name') or compat_str(num)
+            f4m_stream = self._download_json(
+                stream_url, video_id,
+                'Download %s stream JSON' % stream_name)
+            f4m_url = f4m_stream.get('fileurl')
+            if not f4m_url:
+                continue
+            formats.extend(
+                self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': vod_info['title'],
+            'thumbnail': vod_info.get('cover'),
+            'uploader': vod_info.get('program_title'),
+            'duration': parse_duration(vod_info.get('time')),
+            'view_count': int_or_none(vod_info.get('hit')),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -236,3 +236,28 @@ class NBCNewsIE(InfoExtractor):
                'url': info['videoAssets'][-1]['publicUrl'],
                'ie_key': 'ThePlatform',
            }
+
+
+class MSNBCIE(InfoExtractor):
+    # https URLs redirect to corresponding http ones
+    _VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
+        'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
+        'info_dict': {
+            'id': 'n_hayes_Aimm_140801_272214',
+            'ext': 'mp4',
+            'title': 'The chaotic GOP immigration vote',
+            'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1406937606,
+            'upload_date': '20140802',
+            'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        embed_url = self._html_search_meta('embedURL', webpage)
+        return self.url_result(embed_url)
--- a/youtube_dl/extractor/playtvak.py
+++ b/youtube_dl/extractor/playtvak.py
@@ -106,7 +106,7 @@ class PlaytvakIE(InfoExtractor):
        })

        info_url = compat_urlparse.urlunparse(
-            parsed_url._replace(query = compat_urllib_parse.urlencode(qs, True)))
+            parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))

        json_info = self._download_json(
            info_url, video_id,
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -0,0 +1,209 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_duration,
+)
+
+
+class PluralsightIE(InfoExtractor):
+    IE_NAME = 'pluralsight'
+    _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
+    _LOGIN_URL = 'https://www.pluralsight.com/id/'
+    _NETRC_MACHINE = 'pluralsight'
+
+    _TEST = {
+        'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
+        'md5': '4d458cf5cf4c593788672419a8dd4cf8',
+        'info_dict': {
+            'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
+            'ext': 'mp4',
+            'title': 'Management of SQL Server - Demo Monitoring',
+            'duration': 338,
+        },
+        'skip': 'Requires pluralsight account credentials',
+    }
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            raise ExtractorError(
+                'Pluralsight account is required, use --username and --password options to provide account credentials.',
+                expected=True)
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'Username': username.encode('utf-8'),
+            'Password': password.encode('utf-8'),
+        })
+
+        post_url = self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+            'post url', default=self._LOGIN_URL, group='url')
+
+        if not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+        request = compat_urllib_request.Request(
+            post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        error = self._search_regex(
+            r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
+            response, 'error message', default=None)
+        if error:
+            raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        author = mobj.group('author')
+        name = mobj.group('name')
+        clip_id = mobj.group('clip')
+        course = mobj.group('course')
+
+        display_id = '%s-%s' % (name, clip_id)
+
+        webpage = self._download_webpage(url, display_id)
+
+        collection = self._parse_json(
+            self._search_regex(
+                r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
+                webpage, 'modules'),
+            display_id)
+
+        module, clip = None, None
+
+        for module_ in collection:
+            if module_.get('moduleName') == name:
+                module = module_
+                for clip_ in module_.get('clips', []):
+                    clip_index = clip_.get('clipIndex')
+                    if clip_index is None:
+                        continue
+                    if compat_str(clip_index) == clip_id:
+                        clip = clip_
+                        break
+
+        if not clip:
+            raise ExtractorError('Unable to resolve clip')
+
+        QUALITIES = {
+            'low': {'width': 640, 'height': 480},
+            'medium': {'width': 848, 'height': 640},
+            'high': {'width': 1024, 'height': 768},
+        }
+
+        ALLOWED_QUALITIES = (
+            ('webm', ('high',)),
+            ('mp4', ('low', 'medium', 'high',)),
+        )
+
+        formats = []
+        for ext, qualities in ALLOWED_QUALITIES:
+            for quality in qualities:
+                f = QUALITIES[quality].copy()
+                clip_post = {
+                    'a': author,
+                    'cap': 'false',
+                    'cn': clip_id,
+                    'course': course,
+                    'lc': 'en',
+                    'm': name,
+                    'mt': ext,
+                    'q': '%dx%d' % (f['width'], f['height']),
+                }
+                request = compat_urllib_request.Request(
+                    'http://www.pluralsight.com/training/Player/ViewClip',
+                    json.dumps(clip_post).encode('utf-8'))
+                request.add_header('Content-Type', 'application/json;charset=utf-8')
+                format_id = '%s-%s' % (ext, quality)
+                clip_url = self._download_webpage(
+                    request, display_id, 'Downloading %s URL' % format_id, fatal=False)
+                if not clip_url:
+                    continue
+                f.update({
+                    'url': clip_url,
+                    'ext': ext,
+                    'format_id': format_id,
+                })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        # TODO: captions
+        # http://www.pluralsight.com/training/Player/ViewClip + cap = true
+        # or
+        # http://www.pluralsight.com/training/Player/Captions
+        # { a = author, cn = clip_id, lc = end, m = name }
+
+        return {
+            'id': clip['clipName'],
+            'title': '%s - %s' % (module['title'], clip['title']),
+            'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
+            'creator': author,
+            'formats': formats
+        }
+
+
+class PluralsightCourseIE(InfoExtractor):
+    IE_NAME = 'pluralsight:course'
+    _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
+    _TEST = {
+        # Free course from Pluralsight Starter Subscription for Microsoft TechNet
+        # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
+        'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
+        'info_dict': {
+            'id': 'hosting-sql-server-windows-azure-iaas',
+            'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
+            'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
+        },
+        'playlist_count': 31,
+    }
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+
+        # TODO: PSM cookie
+
+        course = self._download_json(
+            'http://www.pluralsight.com/data/course/%s' % course_id,
+            course_id, 'Downloading course JSON')
+
+        title = course['title']
+        description = course.get('description') or course.get('shortDescription')
+
+        course_data = self._download_json(
+            'http://www.pluralsight.com/data/course/content/%s' % course_id,
+            course_id, 'Downloading course data JSON')
+
+        entries = []
+        for module in course_data:
+            for clip in module.get('clips', []):
+                player_parameters = clip.get('playerParameters')
+                if not player_parameters:
+                    continue
+                entries.append(self.url_result(
+                    'http://www.pluralsight.com/training/player?%s' % player_parameters,
+                    'Pluralsight'))
+
+        return self.playlist_result(entries, course_id, title, description)
--- a/youtube_dl/extractor/rtl2.py
+++ b/youtube_dl/extractor/rtl2.py
@@ -1,6 +1,7 @@
 # encoding: utf-8
 from __future__ import unicode_literals

+import re
 from .common import InfoExtractor


@@ -8,22 +9,28 @@ class RTL2IE(InfoExtractor):
    _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
    _TESTS = [{
        'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
-        'md5': 'bfcc179030535b08dc2b36b469b5adc7',
        'info_dict': {
            'id': 'folge-203-0',
            'ext': 'f4v',
            'title': 'GRIP sucht den Sommerkönig',
            'description': 'Matthias, Det und Helge treten gegeneinander an.'
        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
    }, {
        'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
-        'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
        'info_dict': {
            'id': '21040-anna-erwischt-alex',
            'ext': 'mp4',
            'title': 'Anna erwischt Alex!',
            'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
    }]

    def _real_extract(self, url):
@@ -34,12 +41,18 @@ class RTL2IE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        vico_id = self._html_search_regex(
-            r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
-        vivi_id = self._html_search_regex(
-            r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+        mobj = re.search(
+            r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
+            webpage)
+        if mobj:
+            vico_id = mobj.group('vico_id')
+            vivi_id = mobj.group('vivi_id')
+        else:
+            vico_id = self._html_search_regex(
+                r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+            vivi_id = self._html_search_regex(
+                r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
        info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
-        webpage = self._download_webpage(info_url, '')

        info = self._download_json(info_url, video_id)
        video_info = info['video']
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@@ -18,6 +18,10 @@ class RTPIE(InfoExtractor):
            'description': 'As paixões musicais de António Cartaxo e António Macedo',
            'thumbnail': 're:^https?://.*\.jpg',
        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
    }, {
        'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
        'only_matching': True,
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -309,7 +309,7 @@ class SoundcloudUserIE(SoundcloudIE):
            'id': '114582580',
            'title': 'The Akashic Chronicler (All)',
        },
-        'playlist_mincount': 112,
+        'playlist_mincount': 111,
    }, {
        'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
        'info_dict': {
@@ -330,14 +330,14 @@ class SoundcloudUserIE(SoundcloudIE):
            'id': '114582580',
            'title': 'The Akashic Chronicler (Reposts)',
        },
-        'playlist_mincount': 9,
+        'playlist_mincount': 7,
    }, {
        'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
        'info_dict': {
            'id': '114582580',
            'title': 'The Akashic Chronicler (Likes)',
        },
-        'playlist_mincount': 333,
+        'playlist_mincount': 321,
    }, {
        'url': 'https://soundcloud.com/grynpyret/spotlight',
        'info_dict': {
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -16,8 +16,9 @@ from ..aes import aes_decrypt_text


 class SpankwireIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
+    _TESTS = [{
+        # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
        'md5': '8bbfde12b101204b39e4b9fe7eb67095',
        'info_dict': {
@@ -30,14 +31,27 @@ class SpankwireIE(InfoExtractor):
            'upload_date': '20070507',
            'age_limit': 18,
        }
-    }
+    }, {
+        # download URL pattern: */mp4_<format_id>_<video_id>.mp4
+        'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
+        'md5': '09b3c20833308b736ae8902db2f8d7e6',
+        'info_dict': {
+            'id': '1921551',
+            'ext': 'mp4',
+            'title': 'Titcums Compiloation I',
+            'description': 'cum on tits',
+            'uploader': 'dannyh78999',
+            'uploader_id': '3056053',
+            'upload_date': '20150822',
+            'age_limit': 18,
+        },
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        video_id = mobj.group('id')

-        req = compat_urllib_request.Request(url)
+        req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)

@@ -54,7 +68,7 @@ class SpankwireIE(InfoExtractor):
            r'by:\s*<a [^>]*>(.+?)</a>',
            webpage, 'uploader', fatal=False)
        uploader_id = self._html_search_regex(
-            r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
+            r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
            webpage, 'uploader id', fatal=False)
        upload_date = unified_strdate(self._html_search_regex(
            r'</a> on (.+?) at \d+:\d+',
@@ -67,9 +81,10 @@ class SpankwireIE(InfoExtractor):
            r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
            webpage, 'comment count', fatal=False))

-        video_urls = list(map(
-            compat_urllib_parse_unquote,
-            re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
+        videos = re.findall(
+            r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
+        heights = [int(video[0]) for video in videos]
+        video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
        if webpage.find('flashvars\.encrypted = "true"') != -1:
            password = self._search_regex(
                r'flashvars\.video_title = "([^"]+)',
@@ -79,21 +94,22 @@ class SpankwireIE(InfoExtractor):
                video_urls))

        formats = []
-        for video_url in video_urls:
+        for height, video_url in zip(heights, video_urls):
            path = compat_urllib_parse_urlparse(video_url).path
-            format = path.split('/')[4].split('_')[:2]
-            resolution, bitrate_str = format
-            format = "-".join(format)
-            height = int(resolution.rstrip('Pp'))
-            tbr = int(bitrate_str.rstrip('Kk'))
-            formats.append({
+            _, quality = path.split('/')[4].split('_')[:2]
+            f = {
                'url': video_url,
-                'resolution': resolution,
-                'format': format,
-                'tbr': tbr,
                'height': height,
-                'format_id': format,
-            })
+            }
+            tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
+            if tbr:
+                f.update({
+                    'tbr': int(tbr),
+                    'format_id': '%dp' % height,
+                })
+            else:
+                f['format_id'] = quality
+            formats.append(f)
        self._sort_formats(formats)

        age_limit = self._rta_search(webpage)
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -6,7 +6,7 @@ from .mitele import MiTeleIE

 class TelecincoIE(MiTeleIE):
    IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'

    _TESTS = [{
        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
@@ -23,4 +23,7 @@ class TelecincoIE(MiTeleIE):
    }, {
        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
        'only_matching': True,
+    }, {
+        'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
+        'only_matching': True,
    }]
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -1,7 +1,7 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals

 import re
-import json
 import time
 import hmac
 import binascii
@@ -10,7 +10,8 @@ import hashlib

 from .common import InfoExtractor
 from ..compat import (
-    compat_str,
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    determine_ext,
@@ -18,12 +19,69 @@ from ..utils import (
    xpath_with_ns,
    unsmuggle_url,
    int_or_none,
+    url_basename,
+    float_or_none,
 )

-_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
+default_ns = 'http://www.w3.org/2005/SMIL21/Language'
+_x = lambda p: xpath_with_ns(p, {'smil': default_ns})


-class ThePlatformIE(InfoExtractor):
+class ThePlatformBaseIE(InfoExtractor):
+    def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
+        meta = self._download_xml(smil_url, video_id, note=note)
+        try:
+            error_msg = next(
+                n.attrib['abstract']
+                for n in meta.findall(_x('.//smil:ref'))
+                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
+        except StopIteration:
+            pass
+        else:
+            raise ExtractorError(error_msg, expected=True)
+
+        formats = self._parse_smil_formats(
+            meta, smil_url, video_id, namespace=default_ns,
+            # the parameters are from syfy.com, other sites may use others,
+            # they also work for nbc.com
+            f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
+            transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
+
+        for _format in formats:
+            ext = determine_ext(_format['url'])
+            if ext == 'once':
+                _format['ext'] = 'mp4'
+
+        self._sort_formats(formats)
+
+        subtitles = self._parse_smil_subtitles(meta, default_ns)
+
+        return formats, subtitles
+
+    def get_metadata(self, path, video_id):
+        info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
+        info = self._download_json(info_url, video_id)
+
+        subtitles = {}
+        captions = info.get('captions')
+        if isinstance(captions, list):
+            for caption in captions:
+                lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
+                subtitles[lang] = [{
+                    'ext': 'srt' if mime == 'text/srt' else 'ttml',
+                    'url': src,
+                }]
+
+        return {
+            'title': info['title'],
+            'subtitles': subtitles,
+            'description': info['description'],
+            'thumbnail': info['defaultThumbnailUrl'],
+            'duration': int_or_none(info.get('duration'), 1000),
+        }
+
+
+class ThePlatformIE(ThePlatformBaseIE):
    _VALID_URL = r'''(?x)
        (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
           (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
@@ -67,6 +125,20 @@ class ThePlatformIE(InfoExtractor):
    }, {
        'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
        'only_matching': True,
+    }, {
+        'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
+        'md5': '734f3790fb5fc4903da391beeebc4836',
+        'info_dict': {
+            'id': 'tdy_or_siri_150701',
+            'ext': 'mp4',
+            'title': 'iPhone Siri’s sassy response to a math question has people talking',
+            'description': 'md5:a565d1deadd5086f3331d57298ec6333',
+            'duration': 83.0,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1435752600,
+            'upload_date': '20150701',
+            'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
+        },
    }]

    @staticmethod
@@ -101,6 +173,24 @@ class ThePlatformIE(InfoExtractor):
            path += '/media'
        path += '/' + video_id

+        qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        if 'guid' in qs_dict:
+            webpage = self._download_webpage(url, video_id)
+            scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
+            feed_id = None
+            # feed id usually locates in the last script.
+            # Seems there's no pattern for the interested script filename, so
+            # I try one by one
+            for script in reversed(scripts):
+                feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
+                feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
+                if feed_id is not None:
+                    break
+            if feed_id is None:
+                raise ExtractorError('Unable to find feed id')
+            return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
+                provider_id, feed_id, qs_dict['guid'][0]))
+
        if smuggled_data.get('force_smil_url', False):
            smil_url = url
        elif mobj.group('config'):
@@ -120,95 +210,85 @@ class ThePlatformIE(InfoExtractor):
        if sig:
            smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])

-        meta = self._download_xml(smil_url, video_id)
-        try:
-            error_msg = next(
-                n.attrib['abstract']
-                for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
-        except StopIteration:
-            pass
-        else:
-            raise ExtractorError(error_msg, expected=True)
+        formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)

-        info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
-        info_json = self._download_webpage(info_url, video_id)
-        info = json.loads(info_json)
-
-        subtitles = {}
-        captions = info.get('captions')
-        if isinstance(captions, list):
-            for caption in captions:
-                lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
-                subtitles[lang] = [{
-                    'ext': 'srt' if mime == 'text/srt' else 'ttml',
-                    'url': src,
-                }]
-
-        head = meta.find(_x('smil:head'))
-        body = meta.find(_x('smil:body'))
-
-        f4m_node = body.find(_x('smil:seq//smil:video'))
-        if f4m_node is None:
-            f4m_node = body.find(_x('smil:seq/smil:video'))
-        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
-            f4m_url = f4m_node.attrib['src']
-            if 'manifest.f4m?' not in f4m_url:
-                f4m_url += '?'
-            # the parameters are from syfy.com, other sites may use others,
-            # they also work for nbc.com
-            f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
-            formats = self._extract_f4m_formats(f4m_url, video_id)
-        else:
-            formats = []
-            switch = body.find(_x('smil:switch'))
-            if switch is None:
-                switch = body.find(_x('smil:par//smil:switch'))
-            if switch is None:
-                switch = body.find(_x('smil:par/smil:switch'))
-            if switch is None:
-                switch = body.find(_x('smil:par'))
-            if switch is not None:
-                base_url = head.find(_x('smil:meta')).attrib['base']
-                for f in switch.findall(_x('smil:video')):
-                    attr = f.attrib
-                    width = int_or_none(attr.get('width'))
-                    height = int_or_none(attr.get('height'))
-                    vbr = int_or_none(attr.get('system-bitrate'), 1000)
-                    format_id = '%dx%d_%dk' % (width, height, vbr)
-                    formats.append({
-                        'format_id': format_id,
-                        'url': base_url,
-                        'play_path': 'mp4:' + attr['src'],
-                        'ext': 'flv',
-                        'width': width,
-                        'height': height,
-                        'vbr': vbr,
-                    })
-            else:
-                switch = body.find(_x('smil:seq//smil:switch'))
-                if switch is None:
-                    switch = body.find(_x('smil:seq/smil:switch'))
-                for f in switch.findall(_x('smil:video')):
-                    attr = f.attrib
-                    vbr = int_or_none(attr.get('system-bitrate'), 1000)
-                    ext = determine_ext(attr['src'])
-                    if ext == 'once':
-                        ext = 'mp4'
-                    formats.append({
-                        'format_id': compat_str(vbr),
-                        'url': attr['src'],
-                        'vbr': vbr,
-                        'ext': ext,
-                    })
-            self._sort_formats(formats)
-
-        return {
+        ret = self.get_metadata(path, video_id)
+        combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
+        ret.update({
            'id': video_id,
-            'title': info['title'],
-            'subtitles': subtitles,
            'formats': formats,
-            'description': info['description'],
-            'thumbnail': info['defaultThumbnailUrl'],
-            'duration': int_or_none(info.get('duration'), 1000),
-        }
+            'subtitles': combined_subtitles,
+        })
+
+        return ret
+
+
+class ThePlatformFeedIE(ThePlatformBaseIE):
+    _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
+    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
+    _TEST = {
+        # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
+        'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
+        'md5': '22d2b84f058d3586efcd99e57d59d314',
+        'info_dict': {
+            'id': 'n_hardball_5biden_140207',
+            'ext': 'mp4',
+            'title': 'The Biden factor: will Joe run in 2016?',
+            'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20140208',
+            'timestamp': 1391824260,
+            'duration': 467.0,
+            'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('id')
+        provider_id = mobj.group('provider_id')
+        feed_id = mobj.group('feed_id')
+
+        real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
+        feed = self._download_json(real_url, video_id)
+        entry = feed['entries'][0]
+
+        formats = []
+        subtitles = {}
+        first_video_id = None
+        duration = None
+        for item in entry['media$content']:
+            smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
+            cur_video_id = url_basename(smil_url)
+            if first_video_id is None:
+                first_video_id = cur_video_id
+                duration = float_or_none(item.get('plfile$duration'))
+            cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
+            formats.extend(cur_formats)
+            subtitles = self._merge_subtitles(subtitles, cur_subtitles)
+
+        self._sort_formats(formats)
+
+        thumbnails = [{
+            'url': thumbnail['plfile$url'],
+            'width': int_or_none(thumbnail.get('plfile$width')),
+            'height': int_or_none(thumbnail.get('plfile$height')),
+        } for thumbnail in entry.get('media$thumbnails', [])]
+
+        timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
+        categories = [item['media$name'] for item in entry.get('media$categories', [])]
+
+        ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
+        subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
+        ret.update({
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': thumbnails,
+            'duration': duration,
+            'timestamp': timestamp,
+            'categories': categories,
+        })
+
+        return ret
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -12,9 +12,11 @@ from ..compat import (
    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    compat_urllib_request,
+    compat_urlparse,
 )
 from ..utils import (
    ExtractorError,
+    int_or_none,
    parse_duration,
    parse_iso8601,
 )
@@ -26,7 +28,7 @@ class TwitchBaseIE(InfoExtractor):
    _API_BASE = 'https://api.twitch.tv'
    _USHER_BASE = 'http://usher.twitch.tv'
    _LOGIN_URL = 'https://secure.twitch.tv/login'
-    _LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
+    _LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
    _NETRC_MACHINE = 'twitch'

    def _handle_error(self, response):
@@ -69,8 +71,15 @@ class TwitchBaseIE(InfoExtractor):
            'password': password.encode('utf-8'),
        })

+        post_url = self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+            'post url', default=self._LOGIN_POST_URL, group='url')
+
+        if not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
        request = compat_urllib_request.Request(
-            self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+            post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
        request.add_header('Referer', self._LOGIN_URL)
        response = self._download_webpage(
            request, None, 'Logging in as %s' % username)
@@ -132,14 +141,14 @@ class TwitchItemBaseIE(TwitchBaseIE):
    def _extract_info(self, info):
        return {
            'id': info['_id'],
-            'title': info['title'],
-            'description': info['description'],
-            'duration': info['length'],
-            'thumbnail': info['preview'],
-            'uploader': info['channel']['display_name'],
-            'uploader_id': info['channel']['name'],
-            'timestamp': parse_iso8601(info['recorded_at']),
-            'view_count': info['views'],
+            'title': info.get('title') or 'Untitled Broadcast',
+            'description': info.get('description'),
+            'duration': int_or_none(info.get('length')),
+            'thumbnail': info.get('preview'),
+            'uploader': info.get('channel', {}).get('display_name'),
+            'uploader_id': info.get('channel', {}).get('name'),
+            'timestamp': parse_iso8601(info.get('recorded_at')),
+            'view_count': int_or_none(info.get('views')),
        }

    def _real_extract(self, url):
@@ -187,7 +196,7 @@ class TwitchVodIE(TwitchItemBaseIE):
    _ITEM_TYPE = 'vod'
    _ITEM_SHORTCUT = 'v'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
        'info_dict': {
            'id': 'v6528877',
@@ -206,7 +215,26 @@ class TwitchVodIE(TwitchItemBaseIE):
            # m3u8 download
            'skip_download': True,
        },
-    }
+    }, {
+        # Untitled broadcast (title is None)
+        'url': 'http://www.twitch.tv/belkao_o/v/11230755',
+        'info_dict': {
+            'id': 'v11230755',
+            'ext': 'mp4',
+            'title': 'Untitled Broadcast',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 1638,
+            'timestamp': 1439746708,
+            'upload_date': '20150816',
+            'uploader': 'BelkAO_o',
+            'uploader_id': 'belkao_o',
+            'view_count': int,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }]

    def _real_extract(self, url):
        item_id = self._match_id(url)
--- a/youtube_dl/extractor/videobam.py
+++ b/youtube_dl/extractor/videobam.py
@@ -1,81 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import json
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class VideoBamIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
-
-    _TESTS = [
-        {
-            'url': 'http://videobam.com/OiJQM',
-            'md5': 'db471f27763a531f10416a0c58b5a1e0',
-            'info_dict': {
-                'id': 'OiJQM',
-                'ext': 'mp4',
-                'title': 'Is Alcohol Worse Than Ecstasy?',
-                'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
-                'uploader': 'frihetsvinge',
-            },
-        },
-        {
-            'url': 'http://videobam.com/pqLvq',
-            'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
-            'note': 'HD video',
-            'info_dict': {
-                'id': 'pqLvq',
-                'ext': 'mp4',
-                'title': '_',
-            }
-        },
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
-
-        formats = []
-
-        for preference, format_id in enumerate(['low', 'high']):
-            mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
-            if not mobj:
-                continue
-            formats.append({
-                'url': mobj.group('url'),
-                'ext': 'mp4',
-                'format_id': format_id,
-                'preference': preference,
-            })
-
-        if not formats:
-            player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
-            formats = [{
-                'url': item['url'],
-                'ext': 'mp4',
-            } for item in player_config['playlist'] if 'autoPlay' in item]
-
-        self._sort_formats(formats)
-
-        title = self._og_search_title(page, default='_', fatal=False)
-        description = self._og_search_description(page, default=None)
-        thumbnail = self._og_search_thumbnail(page)
-        uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
-        view_count = int_or_none(
-            self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'uploader': uploader,
-            'view_count': view_count,
-            'formats': formats,
-            'age_limit': 18,
-        }
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -1,4 +1,4 @@
-# coding=utf-8
+# coding: utf-8
 from __future__ import unicode_literals

 import re
@@ -12,7 +12,23 @@ from ..utils import (
 )


-class YandexMusicBaseIE(InfoExtractor):
+class YandexMusicTrackIE(InfoExtractor):
+    IE_NAME = 'yandexmusic:track'
+    IE_DESC = 'Яндекс.Музыка - Трек'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/album/540508/track/4878838',
+        'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+        'info_dict': {
+            'id': '4878838',
+            'ext': 'mp3',
+            'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+            'filesize': 4628061,
+            'duration': 193.04,
+        }
+    }
+
    def _get_track_url(self, storage_dir, track_id):
        data = self._download_json(
            'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
@@ -35,24 +51,6 @@ class YandexMusicBaseIE(InfoExtractor):
            'duration': float_or_none(track.get('durationMs'), 1000),
        }

-
-class YandexMusicTrackIE(YandexMusicBaseIE):
-    IE_NAME = 'yandexmusic:track'
-    IE_DESC = 'Яндекс.Музыка - Трек'
-    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
-
-    _TEST = {
-        'url': 'http://music.yandex.ru/album/540508/track/4878838',
-        'md5': 'f496818aa2f60b6c0062980d2e00dc20',
-        'info_dict': {
-            'id': '4878838',
-            'ext': 'mp3',
-            'title': 'Carlo Ambrosio - Gypsy Eyes 1',
-            'filesize': 4628061,
-            'duration': 193.04,
-        }
-    }
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        album_id, track_id = mobj.group('album_id'), mobj.group('id')
@@ -64,7 +62,15 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
        return self._get_track_info(track)


-class YandexMusicAlbumIE(YandexMusicBaseIE):
+class YandexMusicPlaylistBaseIE(InfoExtractor):
+    def _build_playlist(self, tracks):
+        return [
+            self.url_result(
+                'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
+            for track in tracks]
+
+
+class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
    IE_NAME = 'yandexmusic:album'
    IE_DESC = 'Яндекс.Музыка - Альбом'
    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
@@ -85,7 +91,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
            'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
            album_id, 'Downloading album JSON')

-        entries = [self._get_track_info(track) for track in album['volumes'][0]]
+        entries = self._build_playlist(album['volumes'][0])

        title = '%s - %s' % (album['artists'][0]['name'], album['title'])
        year = album.get('year')
@@ -95,7 +101,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
        return self.playlist_result(entries, compat_str(album['id']), title)


-class YandexMusicPlaylistIE(YandexMusicBaseIE):
+class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
    IE_NAME = 'yandexmusic:playlist'
    IE_DESC = 'Яндекс.Музыка - Плейлист'
    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
@@ -120,8 +126,7 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE):
                r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
            playlist_id)['pageData']['playlist']

-        entries = [self._get_track_info(track) for track in playlist['tracks']]
-
        return self.playlist_result(
-            entries, compat_str(playlist_id),
+            self._build_playlist(playlist['tracks']),
+            compat_str(playlist_id),
            playlist['title'], playlist.get('description'))
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.08.16.1'
+__version__ = '2015.08.23'
Author	SHA1	Message	Date
Philipp Hagemeister	11addc50ff	release 2015.08.23	2015-08-23 23:52:47 +02:00
Sergey M․	e4df2f98cc	[yandexmusic:track] Eliminate base class	2015-08-24 00:36:54 +06:00
Sergey M․	e7c14660d3	[yandexmusic] Defer link resolve till actual download time to prevent link expiry (Closes #6650 )	2015-08-24 00:36:24 +06:00
Sergey M․	90076b6172	[spankwire] Preserve old uploader pattern	2015-08-23 22:33:26 +06:00
Sergey M․	28b83495d8	[spankwire] Simplify	2015-08-23 22:32:44 +06:00
Sergey M․	551c7837ac	[spankwire] Simplify and properly format	2015-08-23 22:32:20 +06:00
clauderains	59e6acc757	[spankwire] Support new cdn video url format	2015-08-23 22:21:59 +06:00
clauderains	9990c960f2	[spankwire] Fixed uploader_id field extraction so that test case passes	2015-08-23 22:21:54 +06:00
Sergey M․	2006a06eff	[kontrtube] Fix extraction (Closes #6644 )	2015-08-23 21:43:28 +06:00
Sergey M․	2b6bda1ed8	[pluralsight] Do not yet rely on userMayViewClip	2015-08-23 11:21:56 +06:00
Sergey M․	468083d2f5	[pluralsight] Remove unused const	2015-08-23 10:44:10 +06:00
Sergey M․	483fc223bb	[pluralsight] Add extractor (Closes #6090 )	2015-08-23 10:42:34 +06:00
Sergey M․	66ce97024d	[soundcloud:user] Update tests	2015-08-22 06:30:00 +06:00
Jaime Marquínez Ferrándiz	8c97f81943	[common] Follow convention of using 'cls' in classmethods	2015-08-21 11:35:51 +02:00
Yen Chi Hsuan	d7c1630570	[rtl2] Remove MD5 checksums	2015-08-21 13:21:21 +08:00
Yen Chi Hsuan	5e1a5ac8de	[rtl2] Fix extraction for test_RTL2_1	2015-08-21 13:20:51 +08:00
Yen Chi Hsuan	9eb4ab6ad9	[rtl2] Remove an unused line	2015-08-21 13:04:25 +08:00
Yen Chi Hsuan	4932a817a0	[rtl2] Add skip_download for test	2015-08-21 13:00:08 +08:00
Sergey M․	5d003e29b1	[rtp] Add skip_download for test	2015-08-21 08:56:05 +06:00
Sergey M․	dc95bd503e	[folketinget] Add skip_download for test	2015-08-21 08:54:28 +06:00
Yen Chi Hsuan	f738dd7b7c	[common] Remove debugging codes	2015-08-21 01:43:22 +08:00
Yen Chi Hsuan	f908b74fa3	[test/subtitles] Add test for ThePlatformFeedIE	2015-08-21 01:38:57 +08:00
Yen Chi Hsuan	c687ac745b	[theplatform] Use subtitles from SMIL, too	2015-08-21 01:37:43 +08:00
Yen Chi Hsuan	912e0b7e46	[common] Add _merge_subtitles()	2015-08-21 01:37:07 +08:00
Yen Chi Hsuan	03bc7237ad	[common] _parse_smil_subtitles: accept `lang` as the subtitle language	2015-08-20 23:18:58 +08:00
Yen Chi Hsuan	dd565ac1ad	[theplatform] Use _download_json	2015-08-20 03:07:04 +08:00
Sergey M․	5cdefc4625	[extractor/common] Add more subtitle mime types for guess when ext is missing	2015-08-20 01:02:50 +06:00
Sergey M․	ce00af8767	[extractor/common] Add default subtitles lang	2015-08-20 00:56:17 +06:00
Yen Chi Hsuan	51047444aa	Merge branch 'master' into HEAD	2015-08-20 01:56:08 +08:00
Yen Chi Hsuan	aa6cd05ed8	[theplatform] Fix Python 2: declare coding	2015-08-20 01:47:55 +08:00
Yen Chi Hsuan	dac14bf311	[nbc] Add MSNBCIE	2015-08-20 01:41:18 +08:00
Yen Chi Hsuan	05fe2594e4	[theplatform] Support URLs with 'guid='	2015-08-20 01:38:39 +08:00
Yen Chi Hsuan	26e1c3514f	[theplatform] Add ThePlatformFeedIE	2015-08-20 01:24:32 +08:00
Sergey M․	22c83245c5	[mwave] Improve	2015-08-19 23:07:41 +06:00
ping	7900aede14	[mwave] New extractor for mwave.interest.me	2015-08-19 22:40:40 +06:00
Yen Chi Hsuan	f877c6ae5a	[theplatform] Use InfoExtractor._parse_smil_formats()	2015-08-19 23:11:25 +08:00
Sergey M․	ca681f7041	[videobam] Remove extractor videobam.com redirects to sendvid.com now	2015-08-19 20:52:36 +06:00
Sergey M․	a01da8bbf8	[crunchyroll] Workaround fplive.net rtmp URLs (Closes #5881 )	2015-08-18 23:02:57 +06:00
Sergey M․	f3a65d9636	[travis] Move to new infrastructure We don't use rtmpdump in tests anyway	2015-08-18 21:10:52 +06:00
Sergey M․	559f4c550f	[playtvak] PEP 8	2015-08-18 20:27:58 +06:00
Sergey M․	03c635a4b5	[twitch] Fix login (Closes #6599 )	2015-08-18 20:26:45 +06:00
Sergey M․	34a4cd0a34	[telecinco] Relax _VALID_URL (Closes #6601 )	2015-08-18 20:02:56 +06:00
Philipp Hagemeister	3b9b32f404	[libsyn] Strip options from player URL	2015-08-18 13:02:41 +02:00
Sergey M․	9c724a9802	[twitch:vod] Add test for #6585	2015-08-17 20:23:52 +06:00
Sergey M․	7a6e8a1b17	[twitch] Make more robust	2015-08-17 20:20:04 +06:00
Sergey M․	369c12e038	[twitch] Allow untitled videos (Closes #6585 )	2015-08-17 20:16:43 +06:00