release 2015.10.23

[crunchyroll] Improve subtitle regex (Closes #7262 )
[bbc.co.uk:article] Add new extractor (#7257 )
2015-10-23 09:33:05 +02:00 · 2015-10-22 20:34:11 +06:00 · 2015-10-22 21:13:03 +08:00 · 2015-10-22 21:12:29 +08:00 · 2015-10-22 17:47:11 +08:00 · 2015-10-21 23:57:23 +02:00
35 changed files with 736 additions and 251 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -114,12 +114,13 @@ If you want to add support for a new site, you can follow this quick list (assum
            webpage = self._download_webpage(url, video_id)

            # TODO more code goes here, for example ...
-            title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+            title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')

            return {
                'id': video_id,
                'title': title,
                'description': self._og_search_description(webpage),
+                'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
                # TODO more properties (see youtube_dl/extractor/common.py)
            }
    ```
--- a/README.md
+++ b/README.md
@@ -710,12 +710,13 @@ If you want to add support for a new site, you can follow this quick list (assum
            webpage = self._download_webpage(url, video_id)

            # TODO more code goes here, for example ...
-            title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+            title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')

            return {
                'id': video_id,
                'title': title,
                'description': self._og_search_description(webpage),
+                'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
                # TODO more properties (see youtube_dl/extractor/common.py)
            }
    ```
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -53,6 +53,7 @@
 - **Bandcamp:album**
 - **bbc**: BBC
 - **bbc.co.uk**: BBC iPlayer
+ - **bbc.co.uk:article**: BBC articles
 - **BeatportPro**
 - **Beeg**
 - **BehindKink**
@@ -515,6 +516,7 @@
 - **SSA**
 - **stanfordoc**: Stanford Open ClassRoom
 - **Steam**
+ - **Stitcher**
 - **streamcloud.eu**
 - **StreamCZ**
 - **StreetVoice**
@@ -588,7 +590,8 @@
 - **twitch:stream**
 - **twitch:video**
 - **twitch:vod**
- - **TwitterCard**
+ - **twitter**
+ - **twitter:card**
 - **Ubu**
 - **udemy**
 - **udemy:course**
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -37,12 +37,16 @@ class TestInfoExtractor(unittest.TestCase):
            <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
            <meta content='application/x-shockwave-flash' property='og:video:type'>
            <meta content='Foo' property=og:foobar>
+            <meta name="og:test1" content='foo > < bar'/>
+            <meta name="og:test2" content="foo >//< bar"/>
            '''
        self.assertEqual(ie._og_search_title(html), 'Foo')
        self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
        self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
        self.assertEqual(ie._og_search_video_url(html, default=None), None)
        self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
+        self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
+        self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')

    def test_html_search_meta(self):
        ie = self.ie
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -495,6 +495,9 @@ class TestUtil(unittest.TestCase):
            "playlist":[{"controls":{"all":null}}]
        }''')

+        inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"'''
+        self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''')
+
        inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
        json_code = js_to_json(inp)
        self.assertEqual(json.loads(json_code), json.loads(inp))
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -57,5 +57,14 @@ class TestYoutubeLists(unittest.TestCase):
        entries = result['entries']
        self.assertEqual(len(entries), 100)

+    def test_youtube_flat_playlist_titles(self):
+        dl = FakeYDL()
+        dl.params['extract_flat'] = True
+        ie = YoutubePlaylistIE(dl)
+        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+        self.assertIsPlaylist(result)
+        for entry in result['entries']:
+            self.assertTrue(entry.get('title'))
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -37,6 +37,7 @@ from .compat import (
    compat_tokenize_tokenize,
    compat_urllib_error,
    compat_urllib_request,
+    compat_urllib_request_DataHandler,
 )
 from .utils import (
    ContentTooShortError,
@@ -1967,8 +1968,9 @@ class YoutubeDL(object):
        debuglevel = 1 if self.params.get('debug_printtraffic') else 0
        https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
        ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+        data_handler = compat_urllib_request_DataHandler()
        opener = compat_urllib_request.build_opener(
-            proxy_handler, https_handler, cookie_processor, ydlh)
+            proxy_handler, https_handler, cookie_processor, ydlh, data_handler)

        # Delete the default user-agent header, which would otherwise apply in
        # cases where our custom HTTP handler doesn't come into play
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,7 +1,10 @@
 from __future__ import unicode_literals

+import binascii
 import collections
+import email
 import getpass
+import io
 import optparse
 import os
 import re
@@ -38,6 +41,11 @@ try:
 except ImportError:  # Python 2
    import urlparse as compat_urlparse

+try:
+    import urllib.response as compat_urllib_response
+except ImportError:  # Python 2
+    import urllib as compat_urllib_response
+
 try:
    import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
@@ -155,6 +163,40 @@ except ImportError:  # Python 2
        string = string.replace('+', ' ')
        return compat_urllib_parse_unquote(string, encoding, errors)

+try:
+    from urllib.request import DataHandler as compat_urllib_request_DataHandler
+except ImportError:  # Python < 3.4
+    # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
+    class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
+        def data_open(self, req):
+            # data URLs as specified in RFC 2397.
+            #
+            # ignores POSTed data
+            #
+            # syntax:
+            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
+            # mediatype := [ type "/" subtype ] *( ";" parameter )
+            # data      := *urlchar
+            # parameter := attribute "=" value
+            url = req.get_full_url()
+
+            scheme, data = url.split(":", 1)
+            mediatype, data = data.split(",", 1)
+
+            # even base64 encoded data URLs might be quoted so unquote in any case:
+            data = compat_urllib_parse_unquote_to_bytes(data)
+            if mediatype.endswith(";base64"):
+                data = binascii.a2b_base64(data)
+                mediatype = mediatype[:-7]
+
+            if not mediatype:
+                mediatype = "text/plain;charset=US-ASCII"
+
+            headers = email.message_from_string(
+                "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
+
+            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
+
 try:
    compat_basestring = basestring  # Python 2
 except NameError:
@@ -489,6 +531,8 @@ __all__ = [
    'compat_urllib_parse_unquote_to_bytes',
    'compat_urllib_parse_urlparse',
    'compat_urllib_request',
+    'compat_urllib_request_DataHandler',
+    'compat_urllib_response',
    'compat_urlparse',
    'compat_urlretrieve',
    'compat_xml_parse_error',
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -30,7 +30,7 @@ class HlsFD(FileDownloader):

        args = [ffpp.executable, '-y']

-        if info_dict['http_headers']:
+        if info_dict['http_headers'] and re.match(r'^https?://', url):
            # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
            # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
            args += [
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -45,6 +45,7 @@ from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbc import (
    BBCCoUkIE,
+    BBCCoUkArticleIE,
    BBCIE,
 )
 from .beeg import BeegIE
@@ -586,6 +587,7 @@ from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE, SpiegelArticleIE
 from .spiegeltv import SpiegeltvIE
 from .spike import SpikeIE
+from .stitcher import StitcherIE
 from .sport5 import Sport5IE
 from .sportbox import (
    SportBoxIE,
@@ -690,7 +692,7 @@ from .twitch import (
    TwitchBookmarksIE,
    TwitchStreamIE,
 )
-from .twitter import TwitterCardIE
+from .twitter import TwitterCardIE, TwitterIE
 from .ubu import UbuIE
 from .udemy import (
    UdemyIE,
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -183,7 +183,7 @@ class AdultSwimIE(InfoExtractor):
                media_url = file_el.text
                if determine_ext(media_url) == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
-                        media_url, segment_title, 'mp4', 'm3u8_native', preference=0, m3u8_id='hls'))
+                        media_url, segment_title, 'mp4', preference=0, m3u8_id='hls'))
                else:
                    formats.append({
                        'format_id': '%s_%s' % (bitrate, ftype),
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -20,7 +20,7 @@ from ..compat import compat_HTTPError
 class BBCCoUkIE(InfoExtractor):
    IE_NAME = 'bbc.co.uk'
    IE_DESC = 'BBC iPlayer'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>[\da-z]{8})'

    _MEDIASELECTOR_URLS = [
        # Provides HQ HLS streams with even better quality that pc mediaset but fails
@@ -625,6 +625,7 @@ class BBCIE(BBCCoUkIE):
            'id': 'p02xycnp',
            'ext': 'mp4',
            'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+            'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
            'duration': 140,
        },
        'params': {
@@ -651,7 +652,7 @@ class BBCIE(BBCCoUkIE):

    @classmethod
    def suitable(cls, url):
-        return False if BBCCoUkIE.suitable(url) else super(BBCIE, cls).suitable(url)
+        return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url)

    def _extract_from_media_meta(self, media_meta, video_id):
        # Direct links to media in media metadata (e.g.
@@ -902,3 +903,33 @@ class BBCIE(BBCCoUkIE):
            })

        return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+
+class BBCCoUkArticleIE(InfoExtractor):
+    _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+    IE_NAME = 'bbc.co.uk:article'
+    IE_DESC = 'BBC articles'
+
+    _TEST = {
+        'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
+        'info_dict': {
+            'id': '3jNQLTMrPlYGTBn0WV6M2MS',
+            'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
+            'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
+        },
+        'playlist_count': 4,
+        'add_ie': ['BBCCoUk'],
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage).strip()
+
+        entries = [self.url_result(programme_url) for programme_url in re.findall(
+            r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
+
+        return self.playlist_result(entries, playlist_id, title, description)
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -4,38 +4,53 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import parse_duration


 class Canalc2IE(InfoExtractor):
    IE_NAME = 'canalc2.tv'
-    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?canalc2\.tv/video/(?P<id>\d+)'

    _TEST = {
-        'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        'url': 'http://www.canalc2.tv/video/12163',
        'md5': '060158428b650f896c542dfbb3d6487f',
        'info_dict': {
            'id': '12163',
-            'ext': 'mp4',
-            'title': 'Terrasses du Numérique'
+            'ext': 'flv',
+            'title': 'Terrasses du Numérique',
+            'duration': 122,
+        },
+        'params': {
+            'skip_download': True,  # Requires rtmpdump
        }
    }

    def _real_extract(self, url):
-        video_id = re.match(self._VALID_URL, url).group('id')
-        # We need to set the voir field for getting the file name
-        url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        file_name = self._search_regex(
-            r"so\.addVariable\('file','(.*?)'\);",
-            webpage, 'file name')
-        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+        video_url = self._search_regex(
+            r'jwplayer\((["\'])Player\1\)\.setup\({[^}]*file\s*:\s*(["\'])(?P<file>.+?)\2',
+            webpage, 'video_url', group='file')
+        formats = [{'url': video_url}]
+        if video_url.startswith('rtmp://'):
+            rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
+            formats[0].update({
+                'url': rtmp.group('url'),
+                'ext': 'flv',
+                'app': rtmp.group('app'),
+                'play_path': rtmp.group('play_path'),
+                'page_url': url,
+            })

        title = self._html_search_regex(
-            r'class="evenement8">(.*?)</a>', webpage, 'title')
+            r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
+        duration = parse_duration(self._search_regex(
+            r'id=["\']video_duree["\'][^>]*>([^<]+)',
+            webpage, 'duration', fatal=False))

        return {
            'id': video_id,
-            'ext': 'mp4',
-            'url': video_url,
            'title': title,
+            'duration': duration,
+            'formats': formats,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -172,6 +172,7 @@ class InfoExtractor(object):
    view_count:     How many users have watched the video on the platform.
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
+    repost_count:   Number of reposts of the video
    average_rating: Average rating give by users, the scale used depends on the webpage
    comment_count:  Number of comments on the video
    comments:       A list of comments, each with one or more of the following
@@ -645,7 +646,7 @@ class InfoExtractor(object):
    # Helper functions for extracting OpenGraph info
    @staticmethod
    def _og_regexes(prop):
-        content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
+        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
        property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
                       % {'prop': re.escape(prop)})
        template = r'<meta[^>]+?%s[^>]+?%s'
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -32,6 +32,26 @@ from ..aes import (


 class CrunchyrollBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'crunchyroll'
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+        self.report_login()
+        login_url = 'https://www.crunchyroll.com/?a=formhandler'
+        data = urlencode_postdata({
+            'formname': 'RpcApiUser_Login',
+            'name': username,
+            'password': password,
+        })
+        login_request = compat_urllib_request.Request(login_url, data)
+        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+    def _real_initialize(self):
+        self._login()
+
    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
        request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
                   else compat_urllib_request.Request(url_or_request))
@@ -46,10 +66,22 @@ class CrunchyrollBaseIE(InfoExtractor):
        return super(CrunchyrollBaseIE, self)._download_webpage(
            request, video_id, note, errnote, fatal, tries, timeout, encoding)

+    @staticmethod
+    def _add_skip_wall(url):
+        parsed_url = compat_urlparse.urlparse(url)
+        qs = compat_urlparse.parse_qs(parsed_url.query)
+        # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
+        # > This content may be inappropriate for some people.
+        # > Are you sure you want to continue?
+        # since it's not disabled by default in crunchyroll account's settings.
+        # See https://github.com/rg3/youtube-dl/issues/7202.
+        qs['skip_wall'] = ['1']
+        return compat_urlparse.urlunparse(
+            parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
+

 class CrunchyrollIE(CrunchyrollBaseIE):
    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
-    _NETRC_MACHINE = 'crunchyroll'
    _TESTS = [{
        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
        'info_dict': {
@@ -81,10 +113,13 @@ class CrunchyrollIE(CrunchyrollBaseIE):
            # rtmp
            'skip_download': True,
        },
-
    }, {
        'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
        'only_matching': True,
+    }, {
+        # geo-restricted (US), 18+ maturity wall, non-premium available
+        'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
+        'only_matching': True,
    }]

    _FORMAT_IDS = {
@@ -94,24 +129,6 @@ class CrunchyrollIE(CrunchyrollBaseIE):
        '1080': ('80', '108'),
    }

-    def _login(self):
-        (username, password) = self._get_login_info()
-        if username is None:
-            return
-        self.report_login()
-        login_url = 'https://www.crunchyroll.com/?a=formhandler'
-        data = urlencode_postdata({
-            'formname': 'RpcApiUser_Login',
-            'name': username,
-            'password': password,
-        })
-        login_request = compat_urllib_request.Request(login_url, data)
-        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        self._download_webpage(login_request, None, False, 'Wrong login info')
-
-    def _real_initialize(self):
-        self._login()
-
    def _decrypt_subtitles(self, data, iv, id):
        data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
        iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
@@ -228,7 +245,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text

    def _get_subtitles(self, video_id, webpage):
        subtitles = {}
-        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+        for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
            sub_page = self._download_webpage(
                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
                video_id, note='Downloading subtitles for ' + sub_name)
@@ -254,7 +271,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
        else:
            webpage_url = 'http://www.' + mobj.group('url')

-        webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
+        webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
        note_m = self._html_search_regex(
            r'<div class="showmedia-trailer-notice">(.+?)</div>',
            webpage, 'trailer-notice', default='')
@@ -352,7 +369,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text

 class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
    IE_NAME = "crunchyroll:playlist"
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'

    _TESTS = [{
        'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
@@ -361,12 +378,25 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
            'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
        },
        'playlist_count': 13,
+    }, {
+        # geo-restricted (US), 18+ maturity wall, non-premium available
+        'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
+        'info_dict': {
+            'id': 'cosplay-complex-ova',
+            'title': 'Cosplay Complex OVA'
+        },
+        'playlist_count': 3,
+        'skip': 'Georestricted',
+    }, {
+        # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
+        'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
        show_id = self._match_id(url)

-        webpage = self._download_webpage(url, show_id)
+        webpage = self._download_webpage(self._add_skip_wall(url), show_id)
        title = self._html_search_regex(
            r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
            webpage, 'title')
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -96,6 +96,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                'uploader': 'HotWaves1012',
                'age_limit': 18,
            }
+        },
+        # geo-restricted, player v5
+        {
+            'url': 'http://www.dailymotion.com/video/xhza0o',
+            'only_matching': True,
        }
    ]

@@ -124,6 +129,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
        if player_v5:
            player = self._parse_json(player_v5, video_id)
            metadata = player['metadata']
+
+            self._check_error(metadata)
+
            formats = []
            for quality, media_list in metadata['qualities'].items():
                for media in media_list:
@@ -201,9 +209,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                'video info', flags=re.MULTILINE),
            video_id)

-        if info.get('error') is not None:
-            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
-            raise ExtractorError(msg, expected=True)
+        self._check_error(info)

        formats = []
        for (key, format_id) in self._FORMATS:
@@ -246,6 +252,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
            'duration': info['duration']
        }

+    def _check_error(self, info):
+        if info.get('error') is not None:
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True)
+
    def _get_subtitles(self, video_id, webpage):
        try:
            sub_list = self._download_webpage(
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -87,7 +87,7 @@ class EaglePlatformIE(InfoExtractor):
        m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
        formats = self._extract_m3u8_formats(
            m3u8_url, video_id,
-            'mp4', entry_protocol='m3u8_native')
+            'mp4', entry_protocol='m3u8_native', m3u8_id='hls')

        mp4_url = self._get_video_url(
            # Secure mp4 URL is constructed according to Player.prototype.mp4 from
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -14,7 +14,6 @@ from ..compat import (
 )
 from ..utils import (
    ExtractorError,
-    int_or_none,
    limit_length,
    urlencode_postdata,
    get_element_by_id,
@@ -142,16 +141,20 @@ class FacebookIE(InfoExtractor):
        data = dict(json.loads(m.group(1)))
        params_raw = compat_urllib_parse_unquote(data['params'])
        params = json.loads(params_raw)
-        video_data = params['video_data'][0]

        formats = []
-        for quality in ['sd', 'hd']:
-            src = video_data.get('%s_src' % quality)
-            if src is not None:
-                formats.append({
-                    'format_id': quality,
-                    'url': src,
-                })
+        for format_id, f in params['video_data'].items():
+            if not f or not isinstance(f, list):
+                continue
+            for quality in ('sd', 'hd'):
+                for src_type in ('src', 'src_no_ratelimit'):
+                    src = f[0].get('%s_%s' % (quality, src_type))
+                    if src:
+                        formats.append({
+                            'format_id': '%s_%s_%s' % (format_id, quality, src_type),
+                            'url': src,
+                            'preference': -10 if format_id == 'progressive' else 0,
+                        })
        if not formats:
            raise ExtractorError('Cannot find video formats')

@@ -171,7 +174,5 @@ class FacebookIE(InfoExtractor):
            'id': video_id,
            'title': video_title,
            'formats': formats,
-            'duration': int_or_none(video_data.get('video_duration')),
-            'thumbnail': video_data.get('thumbnail_src'),
            'uploader': uploader,
        }
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -4,8 +4,8 @@ import re
 import json

 from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
+from ..utils import (
+    qualities,
 )


@@ -30,24 +30,33 @@ class ImdbIE(InfoExtractor):
        descr = self._html_search_regex(
            r'(?s)<span itemprop="description">(.*?)</span>',
            webpage, 'description', fatal=False)
-        available_formats = re.findall(
-            r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
-            flags=re.MULTILINE)
+        player_url = 'http://www.imdb.com/video/imdb/vi%s/imdb/single' % video_id
+        player_page = self._download_webpage(
+            player_url, video_id, 'Downloading player page')
+        # the player page contains the info for the default format, we have to
+        # fetch other pages for the rest of the formats
+        extra_formats = re.findall(r'href="(?P<url>%s.*?)".*?>(?P<name>.*?)<' % re.escape(player_url), player_page)
+        format_pages = [
+            self._download_webpage(
+                f_url, video_id, 'Downloading info for %s format' % f_name)
+            for f_url, f_name in extra_formats]
+        format_pages.append(player_page)
+
+        quality = qualities(['SD', '480p', '720p'])
        formats = []
-        for f_id, f_path in available_formats:
-            f_path = f_path.strip()
-            format_page = self._download_webpage(
-                compat_urlparse.urljoin(url, f_path),
-                'Downloading info for %s format' % f_id)
+        for format_page in format_pages:
            json_data = self._search_regex(
                r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
                format_page, 'json data', flags=re.DOTALL)
            info = json.loads(json_data)
            format_info = info['videoPlayerObject']['video']
+            f_id = format_info['ffname']
            formats.append({
                'format_id': f_id,
                'url': format_info['videoInfoList'][0]['videoUrl'],
+                'quality': quality(f_id),
            })
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -205,9 +205,9 @@ class IqiyiIE(InfoExtractor):

    def get_enc_key(self, swf_url, video_id):
        # TODO: automatic key extraction
-        # last update at 2015-10-10 for Zombie::bite
-        # '7239670519b6ac209a0bee4ef0446a6b24894b8ac2751506e42116212a0d0272e505'[2:66][1::2]
-        enc_key = '97596c0abee04ab49ba25564161ad225'
+        # last update at 2015-10-22 for Zombie::bite
+        # '7223c67061dbea1259d0ceb44f44b6d62288f4f80c972170de5201d2321060270e05'[2:66][0::2]
+        enc_key = '2c76de15dcb44bd28ff0927d50d31620'
        return enc_key

    def _real_extract(self, url):
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -9,13 +9,14 @@ from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
-    compat_urlparse,
+    compat_ord,
 )
 from ..utils import (
    determine_ext,
    ExtractorError,
    parse_iso8601,
    int_or_none,
+    encode_data_uri,
 )


@@ -25,15 +26,16 @@ class LetvIE(InfoExtractor):

    _TESTS = [{
        'url': 'http://www.letv.com/ptv/vplay/22005890.html',
-        'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
+        'md5': 'edadcfe5406976f42f9f266057ee5e40',
        'info_dict': {
            'id': '22005890',
            'ext': 'mp4',
            'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
-            'timestamp': 1424747397,
-            'upload_date': '20150224',
            'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
-        }
+        },
+        'params': {
+            'hls_prefer_native': True,
+        },
    }, {
        'url': 'http://www.letv.com/ptv/vplay/1415246.html',
        'info_dict': {
@@ -42,16 +44,22 @@ class LetvIE(InfoExtractor):
            'title': '美人天下01',
            'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
        },
+        'params': {
+            'hls_prefer_native': True,
+        },
    }, {
        'note': 'This video is available only in Mainland China, thus a proxy is needed',
        'url': 'http://www.letv.com/ptv/vplay/1118082.html',
-        'md5': 'f80936fbe20fb2f58648e81386ff7927',
+        'md5': '2424c74948a62e5f31988438979c5ad1',
        'info_dict': {
            'id': '1118082',
            'ext': 'mp4',
            'title': '与龙共舞 完整版',
            'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
        },
+        'params': {
+            'hls_prefer_native': True,
+        },
        'skip': 'Only available in China',
    }]

@@ -74,6 +82,27 @@ class LetvIE(InfoExtractor):
        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
        return _loc3_

+    # see M3U8Encryption class in KLetvPlayer.swf
+    @staticmethod
+    def decrypt_m3u8(encrypted_data):
+        if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
+            return encrypted_data
+        encrypted_data = encrypted_data[5:]
+
+        _loc4_ = bytearray()
+        while encrypted_data:
+            b = compat_ord(encrypted_data[0])
+            _loc4_.extend([b // 16, b & 0x0f])
+            encrypted_data = encrypted_data[1:]
+        idx = len(_loc4_) - 11
+        _loc4_ = _loc4_[idx:] + _loc4_[:idx]
+        _loc7_ = bytearray()
+        while _loc4_:
+            _loc7_.append(_loc4_[0] * 16 + _loc4_[1])
+            _loc4_ = _loc4_[2:]
+
+        return bytes(_loc7_)
+
    def _real_extract(self, url):
        media_id = self._match_id(url)
        page = self._download_webpage(url, media_id)
@@ -115,23 +144,28 @@ class LetvIE(InfoExtractor):
        for format_id in formats:
            if format_id in dispatch:
                media_url = playurl['domain'][0] + dispatch[format_id][0]
-
-                # Mimic what flvxz.com do
-                url_parts = list(compat_urlparse.urlparse(media_url))
-                qs = dict(compat_urlparse.parse_qs(url_parts[4]))
-                qs.update({
-                    'platid': '14',
-                    'splatid': '1401',
-                    'tss': 'no',
-                    'retry': 1
+                media_url += '&' + compat_urllib_parse.urlencode({
+                    'm3v': 1,
+                    'format': 1,
+                    'expect': 3,
+                    'rateid': format_id,
                })
-                url_parts[4] = compat_urllib_parse.urlencode(qs)
-                media_url = compat_urlparse.urlunparse(url_parts)
+
+                nodes_data = self._download_json(
+                    media_url, media_id,
+                    'Download JSON metadata for format %s' % format_id)
+
+                req = self._request_webpage(
+                    nodes_data['nodelist'][0]['location'], media_id,
+                    note='Downloading m3u8 information for format %s' % format_id)
+
+                m3u8_data = self.decrypt_m3u8(req.read())

                url_info_dict = {
-                    'url': media_url,
+                    'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
                    'ext': determine_ext(dispatch[format_id][1]),
                    'format_id': format_id,
+                    'protocol': 'm3u8',
                }

                if format_id[-1:] == 'p':
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -140,13 +140,14 @@ class LyndaIE(LyndaBaseIE):

        prioritized_streams = video_json.get('PrioritizedStreams')
        if prioritized_streams:
-            formats.extend([
-                {
-                    'url': video_url,
-                    'width': int_or_none(format_id),
-                    'format_id': format_id,
-                } for format_id, video_url in prioritized_streams['0'].items()
-            ])
+            for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
+                formats.extend([
+                    {
+                        'url': video_url,
+                        'width': int_or_none(format_id),
+                        'format_id': '%s-%s' % (prioritized_stream_id, format_id),
+                    } for format_id, video_url in prioritized_stream.items()
+                ])

        self._check_formats(formats, video_id)
        self._sort_formats(formats)
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -13,7 +13,7 @@ from ..utils import (


 class OdnoklassnikiIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
    _TESTS = [{
        # metadata in JSON
        'url': 'http://ok.ru/video/20079905452',
@@ -66,6 +66,9 @@ class OdnoklassnikiIE(InfoExtractor):
    }, {
        'url': 'http://www.ok.ru/video/20648036891',
        'only_matching': True,
+    }, {
+        'url': 'http://www.ok.ru/videoembed/20648036891',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@@ -9,16 +9,16 @@ from ..utils import (


 class RteIE(InfoExtractor):
-    _VALID_URL = r'http?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
    _TEST = {
-        'url': 'http://www.rte.ie/player/de/show/10363114/',
+        'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
        'info_dict': {
-            'id': '10363114',
+            'id': '10478715',
            'ext': 'mp4',
-            'title': 'One News',
+            'title': 'Watch iWitness  online',
            'thumbnail': 're:^https?://.*\.jpg$',
-            'description': 'The One O\'Clock News followed by Weather.',
-            'duration': 436.844,
+            'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
+            'duration': 60.046,
        },
        'params': {
            'skip_download': 'f4m fails with --test atm'
--- a/youtube_dl/extractor/stitcher.py
+++ b/youtube_dl/extractor/stitcher.py
@@ -0,0 +1,81 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    js_to_json,
+    unescapeHTML,
+)
+
+
+class StitcherIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/(?:[^/]+/)+e/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)'
+    _TESTS = [{
+        'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
+        'md5': '391dd4e021e6edeb7b8e68fbf2e9e940',
+        'info_dict': {
+            'id': '40789481',
+            'ext': 'mp3',
+            'title': 'Machine Learning Mastery and Cancer Clusters',
+            'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3',
+            'duration': 1604,
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
+        'info_dict': {
+            'id': '40846275',
+            'display_id': 'the-rare-hourlong-comedy-plus',
+            'ext': 'mp3',
+            'title': "The CW's 'Crazy Ex-Girlfriend'",
+            'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
+            'duration': 2235,
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # escaped title
+        'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        audio_id = mobj.group('id')
+        display_id = mobj.group('display_id') or audio_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        episode = self._parse_json(
+            js_to_json(self._search_regex(
+                r'(?s)var\s+stitcher\s*=\s*({.+?});\n', webpage, 'episode config')),
+            display_id)['config']['episode']
+
+        title = unescapeHTML(episode['title'])
+        formats = [{
+            'url': episode[episode_key],
+            'ext': determine_ext(episode[episode_key]) or 'mp3',
+            'vcodec': 'none',
+        } for episode_key in ('origEpisodeURL', 'episodeURL') if episode.get(episode_key)]
+        description = self._search_regex(
+            r'Episode Info:\s*</span>([^<]+)<', webpage, 'description', fatal=False)
+        duration = int_or_none(episode.get('duration'))
+        thumbnail = episode.get('episodeImage')
+
+        return {
+            'id': audio_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -15,6 +15,7 @@ from ..compat import (
    compat_urlparse,
 )
 from ..utils import (
+    encode_dict,
    ExtractorError,
    int_or_none,
    parse_duration,
@@ -27,8 +28,7 @@ class TwitchBaseIE(InfoExtractor):

    _API_BASE = 'https://api.twitch.tv'
    _USHER_BASE = 'http://usher.twitch.tv'
-    _LOGIN_URL = 'https://secure.twitch.tv/login'
-    _LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
+    _LOGIN_URL = 'http://www.twitch.tv/login'
    _NETRC_MACHINE = 'twitch'

    def _handle_error(self, response):
@@ -61,26 +61,28 @@ class TwitchBaseIE(InfoExtractor):
        if username is None:
            return

-        login_page = self._download_webpage(
+        login_page, handle = self._download_webpage_handle(
            self._LOGIN_URL, None, 'Downloading login page')

        login_form = self._hidden_inputs(login_page)

        login_form.update({
-            'login': username.encode('utf-8'),
-            'password': password.encode('utf-8'),
+            'username': username,
+            'password': password,
        })

+        redirect_url = handle.geturl()
+
        post_url = self._search_regex(
            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
-            'post url', default=self._LOGIN_POST_URL, group='url')
+            'post url', default=redirect_url, group='url')

        if not post_url.startswith('http'):
-            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+            post_url = compat_urlparse.urljoin(redirect_url, post_url)

        request = compat_urllib_request.Request(
-            post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        request.add_header('Referer', self._LOGIN_URL)
+            post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8'))
+        request.add_header('Referer', redirect_url)
        response = self._download_webpage(
            request, None, 'Logging in as %s' % username)

@@ -238,14 +240,24 @@ class TwitchVodIE(TwitchItemBaseIE):

    def _real_extract(self, url):
        item_id = self._match_id(url)
+
        info = self._download_info(self._ITEM_SHORTCUT, item_id)
        access_token = self._download_json(
            '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
            'Downloading %s access token' % self._ITEM_TYPE)
+
        formats = self._extract_m3u8_formats(
-            '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
-            % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
+            '%s/vod/%s?%s' % (
+                self._USHER_BASE, item_id,
+                compat_urllib_parse.urlencode({
+                    'allow_source': 'true',
+                    'allow_spectre': 'true',
+                    'player': 'twitchweb',
+                    'nauth': access_token['token'],
+                    'nauthsig': access_token['sig'],
+                })),
            item_id, 'mp4')
+
        self._prefer_source(formats)
        info['formats'] = formats

--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals

 import re
@@ -6,23 +7,51 @@ from .common import InfoExtractor
 from ..compat import compat_urllib_request
 from ..utils import (
    float_or_none,
-    unescapeHTML,
+    xpath_text,
+    remove_end,
 )


 class TwitterCardIE(InfoExtractor):
+    IE_NAME = 'twitter:card'
    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
-        'md5': 'a74f50b310c83170319ba16de6955192',
-        'info_dict': {
-            'id': '560070183650213889',
-            'ext': 'mp4',
-            'title': 'TwitterCard',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 30.033,
+    _TESTS = [
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
+            'md5': '7d2f6b4d2eb841a7ccc893d479bfceb4',
+            'info_dict': {
+                'id': '560070183650213889',
+                'ext': 'mp4',
+                'title': 'TwitterCard',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 30.033,
+            }
        },
-    }
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
+            'md5': '7ee2a553b63d1bccba97fbed97d9e1c8',
+            'info_dict': {
+                'id': '623160978427936768',
+                'ext': 'mp4',
+                'title': 'TwitterCard',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 80.155,
+            },
+        },
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
+            'md5': 'b6f35e8b08a0bec6c8af77a2f4b3a814',
+            'info_dict': {
+                'id': 'dq4Oj5quskI',
+                'ext': 'mp4',
+                'title': 'Ubuntu 11.10 Overview',
+                'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
+                'upload_date': '20111013',
+                'uploader': 'OMG! Ubuntu!',
+                'uploader_id': 'omgubuntu',
+            },
+        }
+    ]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@@ -40,10 +69,24 @@ class TwitterCardIE(InfoExtractor):
            request.add_header('User-Agent', user_agent)
            webpage = self._download_webpage(request, video_id)

-            config = self._parse_json(
-                unescapeHTML(self._search_regex(
-                    r'data-player-config="([^"]+)"', webpage, 'data player config')),
+            youtube_url = self._html_search_regex(
+                r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+                webpage, 'youtube iframe', default=None)
+            if youtube_url:
+                return self.url_result(youtube_url, 'Youtube')
+
+            config = self._parse_json(self._html_search_regex(
+                r'data-player-config="([^"]+)"', webpage, 'data player config'),
                video_id)
+            if 'playlist' not in config:
+                if 'vmapUrl' in config:
+                    vmap_data = self._download_xml(config['vmapUrl'], video_id)
+                    video_url = xpath_text(vmap_data, './/MediaFile').strip()
+                    formats.append({
+                        'url': video_url,
+                    })
+                    break   # same video regardless of UA
+                continue

            video_url = config['playlist'][0]['source']

@@ -70,3 +113,54 @@ class TwitterCardIE(InfoExtractor):
            'duration': duration,
            'formats': formats,
        }
+
+
+class TwitterIE(InfoExtractor):
+    IE_NAME = 'twitter'
+    _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
+    _TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
+
+    _TEST = {
+        'url': 'https://twitter.com/freethenipple/status/643211948184596480',
+        'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
+        'info_dict': {
+            'id': '643211948184596480',
+            'ext': 'mp4',
+            'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 12.922,
+            'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
+            'uploader': 'FREE THE NIPPLE',
+            'uploader_id': 'freethenipple',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user_id')
+        twid = mobj.group('id')
+
+        webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid)
+
+        username = remove_end(self._og_search_title(webpage), ' on Twitter')
+
+        title = self._og_search_description(webpage).strip('').replace('\n', ' ')
+
+        # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
+        mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
+        title, short_url = mobj.groups()
+
+        card_id = self._search_regex(
+            r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url')
+        card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'TwitterCard',
+            'uploader_id': user_id,
+            'uploader': username,
+            'url': card_url,
+            'webpage_url': url,
+            'description': '%s on Twitter: "%s %s"' % (username, title, short_url),
+            'title': username + ' - ' + title,
+        }
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -14,7 +14,7 @@ class VidmeIE(InfoExtractor):
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
    _TESTS = [{
        'url': 'https://vid.me/QNB',
-        'md5': 'c62f1156138dc3323902188c5b5a8bd6',
+        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
        'info_dict': {
            'id': 'QNB',
            'ext': 'mp4',
@@ -93,6 +93,35 @@ class VidmeIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
+    }, {
+        # nsfw, user-disabled
+        'url': 'https://vid.me/dzGJ',
+        'only_matching': True,
+    }, {
+        # suspended
+        'url': 'https://vid.me/Ox3G',
+        'only_matching': True,
+    }, {
+        # no formats in the API response
+        'url': 'https://vid.me/e5g',
+        'info_dict': {
+            'id': 'e5g',
+            'ext': 'mp4',
+            'title': 'Video upload (e5g)',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'timestamp': 1401480195,
+            'upload_date': '20140530',
+            'uploader': None,
+            'uploader_id': None,
+            'age_limit': 0,
+            'duration': 483,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
    }]

    def _real_extract(self, url):
@@ -114,6 +143,12 @@ class VidmeIE(InfoExtractor):

        video = response['video']

+        if video.get('state') in ('user-disabled', 'suspended'):
+            raise ExtractorError(
+                'Vidme said: This video has been suspended either due to a copyright claim, '
+                'or for violating the terms of use.',
+                expected=True)
+
        formats = [{
            'format_id': f.get('type'),
            'url': f['uri'],
@@ -121,6 +156,14 @@ class VidmeIE(InfoExtractor):
            'height': int_or_none(f.get('height')),
            'preference': 0 if f.get('type', '').endswith('clip') else 1,
        } for f in video.get('formats', []) if f.get('uri')]
+
+        if not formats and video.get('complete_url'):
+            formats.append({
+                'url': video.get('complete_url'),
+                'width': int_or_none(video.get('width')),
+                'height': int_or_none(video.get('height')),
+            })
+
        self._sort_formats(formats)

        title = video['title']
@@ -137,7 +180,7 @@ class VidmeIE(InfoExtractor):

        return {
            'id': video_id,
-            'title': title,
+            'title': title or 'Video upload (%s)' % video_id,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -131,10 +131,11 @@ class ViewsterIE(InfoExtractor):
                formats.extend(self._extract_f4m_formats(
                    video_url, video_id, f4m_id='hds'))
            elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_formats = self._extract_m3u8_formats(
                    video_url, video_id, 'mp4', m3u8_id='hls',
-                    fatal=False  # m3u8 sometimes fail
-                ))
+                    fatal=False)  # m3u8 sometimes fail
+                if m3u8_formats:
+                    formats.extend(m3u8_formats)
            else:
                format_id = media.get('Bitrate')
                f = {
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -133,7 +133,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                'uploader_id': 'user18948128',
                'uploader': 'Jaime Marquínez Ferrándiz',
                'duration': 10,
-                'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.',
+                'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026',
            },
            'params': {
                'videopassword': 'youtube-dl',
@@ -181,6 +181,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
                'uploader_id': 'user28849593',
            },
        },
+        {
+            'url': 'https://vimeo.com/109815029',
+            'note': 'Video not completely processed, "failed" seed status',
+            'only_matching': True,
+        },
    ]

    @staticmethod
@@ -273,20 +278,30 @@ class VimeoIE(VimeoBaseInfoExtractor):
        self.report_extraction(video_id)

        vimeo_config = self._search_regex(
-            r'vimeo\.config\s*=\s*({.+?});', webpage,
+            r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
            'vimeo config', default=None)
        if vimeo_config:
            seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
            if seed_status.get('state') == 'failed':
                raise ExtractorError(
-                    '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+                    '%s said: %s' % (self.IE_NAME, seed_status['title']),
                    expected=True)

        # Extract the config JSON
        try:
            try:
                config_url = self._html_search_regex(
-                    r' data-config-url="(.+?)"', webpage, 'config URL')
+                    r' data-config-url="(.+?)"', webpage,
+                    'config URL', default=None)
+                if not config_url:
+                    # Sometimes new react-based page is served instead of old one that require
+                    # different config URL extraction approach (see
+                    # https://github.com/rg3/youtube-dl/pull/7209)
+                    vimeo_clip_page_config = self._search_regex(
+                        r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
+                        'vimeo clip page config')
+                    config_url = self._parse_json(
+                        vimeo_clip_page_config, video_id)['player']['config_url']
                config_json = self._download_webpage(config_url, video_id)
                config = json.loads(config_json)
            except RegexNotFoundError:
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -1,10 +1,14 @@
+# coding: utf-8
 from __future__ import unicode_literals

 import re
 import itertools

 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+    int_or_none,
+    unified_strdate,
+)


 class VineIE(InfoExtractor):
@@ -17,10 +21,12 @@ class VineIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Chicken.',
            'alt_title': 'Vine by Jack Dorsey',
-            'description': 'Chicken.',
            'upload_date': '20130519',
            'uploader': 'Jack Dorsey',
            'uploader_id': '76',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
        },
    }, {
        'url': 'https://vine.co/v/MYxVapFvz2z',
@@ -29,11 +35,13 @@ class VineIE(InfoExtractor):
            'id': 'MYxVapFvz2z',
            'ext': 'mp4',
            'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
-            'alt_title': 'Vine by Luna',
-            'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+            'alt_title': 'Vine by Mars Ruiz',
            'upload_date': '20140815',
-            'uploader': 'Luna',
+            'uploader': 'Mars Ruiz',
            'uploader_id': '1102363502380728320',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
        },
    }, {
        'url': 'https://vine.co/v/bxVjBbZlPUH',
@@ -43,14 +51,33 @@ class VineIE(InfoExtractor):
            'ext': 'mp4',
            'title': '#mw3 #ac130 #killcam #angelofdeath',
            'alt_title': 'Vine by Z3k3',
-            'description': '#mw3 #ac130 #killcam #angelofdeath',
            'upload_date': '20130430',
            'uploader': 'Z3k3',
            'uploader_id': '936470460173008896',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
        },
    }, {
        'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
        'only_matching': True,
+    }, {
+        'url': 'https://vine.co/v/e192BnZnZ9V',
+        'info_dict': {
+            'id': 'e192BnZnZ9V',
+            'ext': 'mp4',
+            'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries  #lovesickseason2',
+            'alt_title': 'Vine by Pimry_zaa',
+            'upload_date': '20150705',
+            'uploader': 'Pimry_zaa',
+            'uploader_id': '1135760698325307392',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
    }]

    def _real_extract(self, url):
@@ -58,32 +85,33 @@ class VineIE(InfoExtractor):
        webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)

        data = self._parse_json(
-            self._html_search_regex(
-                r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id,
+            self._search_regex(
+                r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*</script>' % video_id,
                webpage, 'vine data'),
            video_id)

        formats = [{
            'format_id': '%(format)s-%(rate)s' % f,
-            'vcodec': f['format'],
-            'quality': f['rate'],
+            'vcodec': f.get('format'),
+            'quality': f.get('rate'),
            'url': f['videoUrl'],
-        } for f in data['videoUrls']]
+        } for f in data['videoUrls'] if f.get('videoUrl')]

        self._sort_formats(formats)

+        username = data.get('username')
+
        return {
            'id': video_id,
-            'title': self._og_search_title(webpage),
-            'alt_title': self._og_search_description(webpage, default=None),
-            'description': data['description'],
-            'thumbnail': data['thumbnailUrl'],
-            'upload_date': unified_strdate(data['created']),
-            'uploader': data['username'],
-            'uploader_id': data['userIdStr'],
-            'like_count': data['likes']['count'],
-            'comment_count': data['comments']['count'],
-            'repost_count': data['reposts']['count'],
+            'title': data.get('description') or self._og_search_title(webpage),
+            'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None),
+            'thumbnail': data.get('thumbnailUrl'),
+            'upload_date': unified_strdate(data.get('created')),
+            'uploader': username,
+            'uploader_id': data.get('userIdStr'),
+            'like_count': int_or_none(data.get('likes', {}).get('count')),
+            'comment_count': int_or_none(data.get('comments', {}).get('count')),
+            'repost_count': int_or_none(data.get('reposts', {}).get('count')),
            'formats': formats,
        }

--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -178,6 +178,52 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            return


+class YoutubePlaylistBaseInfoExtractor(InfoExtractor):
+    # Extract the video ids from the playlist pages
+    def _entries(self, page, playlist_id):
+        more_widget_html = content_html = page
+        for page_num in itertools.count(1):
+            for video_id, video_title in self.extract_videos_from_page(content_html):
+                yield self.url_result(
+                    video_id, 'Youtube', video_id=video_id,
+                    video_title=video_title)
+
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+            if not mobj:
+                break
+
+            more = self._download_json(
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
+            content_html = more['content_html']
+            if not content_html.strip():
+                # Some webpages show a "Load more" button but they don't
+                # have more videos
+                break
+            more_widget_html = more['load_more_widget_html']
+
+    def extract_videos_from_page(self, page):
+        ids_in_page = []
+        titles_in_page = []
+        for mobj in re.finditer(self._VIDEO_RE, page):
+            # The link with index 0 is not the first video of the playlist (not sure if still actual)
+            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
+                continue
+            video_id = mobj.group('id')
+            video_title = unescapeHTML(mobj.group('title'))
+            if video_title:
+                video_title = video_title.strip()
+            try:
+                idx = ids_in_page.index(video_id)
+                if video_title and not titles_in_page[idx]:
+                    titles_in_page[idx] = video_title
+            except ValueError:
+                ids_in_page.append(video_id)
+                titles_in_page.append(video_title)
+        return zip(ids_in_page, titles_in_page)
+
+
 class YoutubeIE(YoutubeBaseInfoExtractor):
    IE_DESC = 'YouTube.com'
    _VALID_URL = r"""(?x)^
@@ -1419,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        }


-class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
+class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
    IE_DESC = 'YouTube.com playlists'
    _VALID_URL = r"""(?x)(?:
                        (?:https?://)?
@@ -1440,7 +1486,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                        ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
                     )"""
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
-    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
+    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
    IE_NAME = 'youtube:playlist'
    _TESTS = [{
        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
@@ -1557,37 +1603,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            else:
                self.report_warning('Youtube gives an alert message: ' + match)

-        # Extract the video ids from the playlist pages
-        def _entries():
-            more_widget_html = content_html = page
-            for page_num in itertools.count(1):
-                matches = re.finditer(self._VIDEO_RE, content_html)
-                # We remove the duplicates and the link with index 0
-                # (it's not the first video of the playlist)
-                new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
-                for vid_id in new_ids:
-                    yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
-
-                mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
-                if not mobj:
-                    break
-
-                more = self._download_json(
-                    'https://youtube.com/%s' % mobj.group('more'), playlist_id,
-                    'Downloading page #%s' % page_num,
-                    transform_source=uppercase_escape)
-                content_html = more['content_html']
-                if not content_html.strip():
-                    # Some webpages show a "Load more" button but they don't
-                    # have more videos
-                    break
-                more_widget_html = more['load_more_widget_html']
-
        playlist_title = self._html_search_regex(
            r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
            page, 'title')

-        return self.playlist_result(_entries(), playlist_id, playlist_title)
+        return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)

    def _real_extract(self, url):
        # Extract playlist id
@@ -1613,10 +1633,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
        return self._extract_playlist(playlist_id)


-class YoutubeChannelIE(InfoExtractor):
+class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
    IE_DESC = 'YouTube.com channels'
    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
    _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
+    _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
    IE_NAME = 'youtube:channel'
    _TESTS = [{
        'note': 'paginated channel',
@@ -1627,22 +1648,6 @@ class YoutubeChannelIE(InfoExtractor):
        }
    }]

-    @staticmethod
-    def extract_videos_from_page(page):
-        ids_in_page = []
-        titles_in_page = []
-        for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
-            video_id = mobj.group('id')
-            video_title = unescapeHTML(mobj.group('title'))
-            try:
-                idx = ids_in_page.index(video_id)
-                if video_title and not titles_in_page[idx]:
-                    titles_in_page[idx] = video_title
-            except ValueError:
-                ids_in_page.append(video_id)
-                titles_in_page.append(video_title)
-        return zip(ids_in_page, titles_in_page)
-
    def _real_extract(self, url):
        channel_id = self._match_id(url)

@@ -1685,29 +1690,7 @@ class YoutubeChannelIE(InfoExtractor):
                for video_id, video_title in self.extract_videos_from_page(channel_page)]
            return self.playlist_result(entries, channel_id)

-        def _entries():
-            more_widget_html = content_html = channel_page
-            for pagenum in itertools.count(1):
-
-                for video_id, video_title in self.extract_videos_from_page(content_html):
-                    yield self.url_result(
-                        video_id, 'Youtube', video_id=video_id,
-                        video_title=video_title)
-
-                mobj = re.search(
-                    r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
-                    more_widget_html)
-                if not mobj:
-                    break
-
-                more = self._download_json(
-                    'https://youtube.com/%s' % mobj.group('more'), channel_id,
-                    'Downloading page #%s' % (pagenum + 1),
-                    transform_source=uppercase_escape)
-                content_html = more['content_html']
-                more_widget_html = more['load_more_widget_html']
-
-        return self.playlist_result(_entries(), channel_id)
+        return self.playlist_result(self._entries(channel_page, channel_id), channel_id)


 class YoutubeUserIE(YoutubeChannelIE):
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -9,6 +9,7 @@ from ..utils import (
    int_or_none,
    unified_strdate,
    OnDemandPagedList,
+    xpath_text,
 )


@@ -19,13 +20,11 @@ def extract_from_xml_url(ie, video_id, xml_url):
        errnote='Failed to download video info')

    title = doc.find('.//information/title').text
-    description = doc.find('.//information/detail').text
-    duration = int(doc.find('.//details/lengthSec').text)
-    uploader_node = doc.find('.//details/originChannelTitle')
-    uploader = None if uploader_node is None else uploader_node.text
-    uploader_id_node = doc.find('.//details/originChannelId')
-    uploader_id = None if uploader_id_node is None else uploader_id_node.text
-    upload_date = unified_strdate(doc.find('.//details/airtime').text)
+    description = xpath_text(doc, './/information/detail', 'description')
+    duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
+    uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
+    uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
+    upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))

    def xml_to_format(fnode):
        video_url = fnode.find('url').text
@@ -40,15 +39,14 @@ def extract_from_xml_url(ie, video_id, xml_url):
        ext = format_m.group('container')
        proto = format_m.group('proto').lower()

-        quality = fnode.find('./quality').text
-        abr = int(fnode.find('./audioBitrate').text) // 1000
-        vbr_node = fnode.find('./videoBitrate')
-        vbr = None if vbr_node is None else int(vbr_node.text) // 1000
+        quality = xpath_text(fnode, './quality', 'quality')
+        abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
+        vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)

-        width_node = fnode.find('./width')
-        width = None if width_node is None else int_or_none(width_node.text)
-        height_node = fnode.find('./height')
-        height = None if height_node is None else int_or_none(height_node.text)
+        width = int_or_none(xpath_text(fnode, './width', 'width'))
+        height = int_or_none(xpath_text(fnode, './height', 'height'))
+
+        filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))

        format_note = ''
        if not format_note:
@@ -64,12 +62,31 @@ def extract_from_xml_url(ie, video_id, xml_url):
            'vbr': vbr,
            'width': width,
            'height': height,
-            'filesize': int_or_none(fnode.find('./filesize').text),
+            'filesize': filesize,
            'format_note': format_note,
            'protocol': proto,
            '_available': is_available,
        }

+    def xml_to_thumbnails(fnode):
+        thumbnails = []
+        for node in fnode:
+            thumbnail_url = node.text
+            if not thumbnail_url:
+                continue
+            thumbnail = {
+                'url': thumbnail_url,
+            }
+            if 'key' in node.attrib:
+                m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
+                if m:
+                    thumbnail['width'] = int(m.group(1))
+                    thumbnail['height'] = int(m.group(2))
+            thumbnails.append(thumbnail)
+        return thumbnails
+
+    thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
+
    format_nodes = doc.findall('.//formitaeten/formitaet')
    formats = list(filter(
        lambda f: f['_available'],
@@ -81,6 +98,7 @@ def extract_from_xml_url(ie, video_id, xml_url):
        'title': title,
        'description': description,
        'duration': duration,
+        'thumbnails': thumbnails,
        'uploader': uploader,
        'uploader_id': uploader_id,
        'upload_date': upload_date,
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3,6 +3,7 @@

 from __future__ import unicode_literals

+import base64
 import calendar
 import codecs
 import contextlib
@@ -1700,8 +1701,8 @@ def js_to_json(code):
        if v in ('true', 'false', 'null'):
            return v
        if v.startswith('"'):
-            return v
-        if v.startswith("'"):
+            v = re.sub(r"\\'", "'", v[1:-1])
+        elif v.startswith("'"):
            v = v[1:-1]
            v = re.sub(r"\\\\|\\'|\"", lambda m: {
                '\\\\': '\\\\',
@@ -1795,6 +1796,10 @@ def urlhandle_detect_ext(url_handle):
    return mimetype2ext(getheader('Content-Type'))


+def encode_data_uri(data, mime_type):
+    return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+
+
 def age_restricted(content_limit, age_limit):
    """ Returns True iff the content should be blocked """

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.10.16'
+__version__ = '2015.10.23'
Author	SHA1	Message	Date
Philipp Hagemeister	ab03c0b47c	release 2015.10.23	2015-10-23 09:33:05 +02:00
Sergey M․	7690787553	[crunchyroll] Improve subtitle regex (Closes #7262 )	2015-10-22 20:34:11 +06:00
Yen Chi Hsuan	a65402ef42	[bbc.co.uk:article] Add new extractor (#7257 )	2015-10-22 21:13:03 +08:00
Yen Chi Hsuan	7033bc1a51	[bbc] Fix test_BBC_9	2015-10-22 21:12:29 +08:00
Yen Chi Hsuan	89d5fbf354	[iqiyi] Update key	2015-10-22 17:47:11 +08:00
Jaime Marquínez Ferrándiz	8c3533ba97	[adultswim] Don't default to the native m3u8 downloader (closes #7243 ) Some of the streams are encrypted, which is not supported .	2015-10-21 23:57:23 +02:00
Sergey M․	44d6dd08b2	[facebook] Fix extraction (Closes #7252 )	2015-10-21 21:35:57 +06:00
Sergey M․	cc449417c4	[vine] Use _search_regex for JSON data (Closes #7254 , closes #7255 )	2015-10-21 20:35:22 +06:00
Sergey M․	7308b8cb3d	[stitcher] Improve (Closes #7162 , closes #7228 )	2015-10-20 23:12:13 +06:00
mjdubell	4211c83aa4	[stitcher] Add extractor Stitcher review updates Removed re import Stitcher review updates	2015-10-20 23:11:26 +06:00
Sergey M․	d01949dc89	[utils:js_to_json] Fix bad escape in double quoted strings	2015-10-20 23:09:51 +06:00
Sergey M.	63a6494834	Merge pull request #7242 from lalinsky/vimeo-seed-status [vimeo] Fix error parsing	2015-10-20 20:57:12 +06:00
Lukáš Lalinský	8bea039b83	[vimeo] New test, fixed one older test	2015-10-20 16:38:44 +02:00
Sergey M․	d65889bbc0	[vidme] Update test	2015-10-20 20:18:23 +06:00
Sergey M․	4a8963770e	[vidme] Use original vid.me title template for untitled videos	2015-10-20 20:17:54 +06:00
Sergey M.	5b0aa2c7b1	Merge pull request #7237 from lalinsky/vidme2 [vidme] Stream URL fallback, better error message for suspended videos	2015-10-20 20:14:12 +06:00
Lukáš Lalinský	b6aa99aff8	[vimeo] Fix error parsing	2015-10-20 10:31:32 +02:00
Lukáš Lalinský	0be30bafa4	[vidme] Stream URL fallback, better error message for suspended videos	2015-10-19 20:53:27 +02:00
Sergey M․	7b091c370c	[zdf] Modernize and PEP 8	2015-10-19 01:48:05 +06:00
Sergey M.	334b5c3b72	Merge pull request #7225 from kennell/master [zdf] Extract thumbnails	2015-10-19 01:30:14 +06:00
kennell	b7cedb1604	simplify thumbnail dict building	2015-10-18 21:25:26 +02:00
Sergey M.	2038ad6ee7	[README.md] Add uploader extraction sample in example extractor	2015-10-19 01:12:41 +06:00
kennell	b243340f0c	check if key attrib matches resolution pattern	2015-10-18 21:07:52 +02:00
kennell	8cc83d301d	use int_or_none, check if attrib exists, remove thumbnail	2015-10-18 20:47:42 +02:00
Sergey M․	d762f86e94	[ok] Extend _VALID_URL	2015-10-19 00:11:16 +06:00
kennell	264b23e1a4	adds thumbnail support for ZDF Mediathek extractor	2015-10-18 19:56:22 +02:00
Philipp Hagemeister	a6e0afa2bb	release 2015.10.18	2015-10-18 19:23:40 +02:00
Yen Chi Hsuan	4285a47f40	Merge pull request #7208 from yan12125/letv-fix [Letv] Fix extraction	2015-10-18 22:32:10 +08:00
Sergey M․	e36963e0eb	[eagleplatform] Identify hls formats	2015-10-18 20:24:33 +06:00
Sergey M․	dedd35c6bc	[viewster] Fix failing m3u8	2015-10-18 19:59:18 +06:00
Sergey M․	608945d44a	[canalc2] Fix test	2015-10-18 19:27:22 +06:00
Sergey M․	b1bf063503	[canalc2] Extract duration	2015-10-18 19:27:05 +06:00
Sergey M․	14bddf35fb	[canalc2] Add ext	2015-10-18 19:23:52 +06:00
Sergey M․	ef6c868f23	[canalc2] Improve some regexes	2015-10-18 19:23:31 +06:00
Sergey M․	6682049dee	[canalc2] Improve rtmp extraction	2015-10-18 19:19:43 +06:00
remitamine	b0f001a6cb	[canalc2] fix info extraction	2015-10-18 19:06:53 +06:00
Jaime Marquínez Ferrándiz	dd67702a3e	[imdb] Fix extraction (fixes #7220 )	2015-10-18 14:13:06 +02:00
Yen Chi Hsuan	05a3879f1c	[letv] Update M3U8's MIME type The new MIME type appears in the following places: https://www.iana.org/assignments/media-types/media-types.xhtml#application https://hg.python.org/cpython/file/tip/Lib/mimetypes.py	2015-10-18 19:19:46 +08:00
Yen Chi Hsuan	4a7b790384	[twitter:card] Support YouTube embeds	2015-10-18 19:07:37 +08:00
Yen Chi Hsuan	09ff81316e	Merge branch 'atomicdryad-pr-twitter'	2015-10-18 18:44:21 +08:00
Yen Chi Hsuan	c88aec845a	[twitter] Fix short URL extraction	2015-10-18 18:23:56 +08:00
Yen Chi Hsuan	77a54b6a65	[twitter:card] Use _html_search_regex	2015-10-18 18:08:24 +08:00
Yen Chi Hsuan	575036b405	[twitter] Simplify and improve	2015-10-18 18:04:13 +08:00
Yen Chi Hsuan	f6dfd6603a	[twitter] Use _html_search_regex	2015-10-18 17:18:01 +08:00
Yen Chi Hsuan	e04edad621	[twitter] Inherit from InfoExtractor directly	2015-10-18 17:16:57 +08:00
Yen Chi Hsuan	f322bfb063	[twitter:card] Remove unneeded 'ext'	2015-10-18 17:15:47 +08:00
Yen Chi Hsuan	014e880372	[twitter] Add IE_NAMEs	2015-10-18 17:13:58 +08:00
Yen Chi Hsuan	01d22d4703	[twitter] Use _download_xml	2015-10-18 17:11:55 +08:00
Yen Chi Hsuan	48aae2d2cf	[twitter] Update tests	2015-10-18 17:07:48 +08:00
Yen Chi Hsuan	c571dea953	Merge branch 'pr-twitter' of https://github.com/atomicdryad/youtube-dl into atomicdryad-pr-twitter	2015-10-18 16:49:56 +08:00
Yen Chi Hsuan	8b172c2e10	[YoutubeDL] Use DataHandler	2015-10-18 13:44:22 +08:00
Yen Chi Hsuan	0a67a3632b	[compat] Add compat_urllib_request_DataHandler	2015-10-18 13:44:21 +08:00
Yen Chi Hsuan	985e4fdc07	[downloader/hls] Add headers only for http(s) URLs ffmpeg 2.8.1 raises an error with -headers and non-http input files.	2015-10-18 13:44:21 +08:00
Yen Chi Hsuan	1e399778ee	[letv] Fix extraction Using data URIs for passing the decrypted M3U8 manifest, which is supported by ffmpeg only.	2015-10-18 13:42:57 +08:00
Sergey M․	2e022397c4	[vine] Add counters to tests	2015-10-18 09:36:19 +06:00
Sergey M․	02835c6bf4	[extractor/common] Document repost_count	2015-10-18 09:34:54 +06:00
Sergey M․	91816e8f16	[vine] Remove duplicate metadata, make more robust and modernize (Closes #7215 )	2015-10-18 09:32:08 +06:00
Lukáš Lalinský	10c38c7ca2	[vine] Fix download tests	2015-10-18 09:20:54 +06:00
Lukáš Lalinský	94a773feb9	[vine] Use JS data to get title/alt_title	2015-10-18 09:20:46 +06:00
Sergey M․	448ef1f31c	[extractor/common] Allow angle brackets in attributes in _og_regexes (#7215 )	2015-10-18 09:11:02 +06:00
Sergey M․	49941c4e4f	[crunchyroll] Add maturity wall reference tests (#7202 )	2015-10-18 07:06:47 +06:00
Sergey M․	80f48920c8	[crunchyroll] Bypass maturity wall (Closes #7202 )	2015-10-18 06:57:57 +06:00
Sergey M․	5a11b793fe	[lynda] Extract all prioritized streams	2015-10-18 01:36:03 +06:00
Sergey M․	7593fbaa12	[dailymotion] Error spelling	2015-10-18 01:00:37 +06:00
Sergey M.	2eb0f72a0e	Merge pull request #7212 from lalinsky/dailymotion-error [dailymotion] Report errors from player v5	2015-10-18 00:54:27 +06:00
Sergey M․	8e5b121948	[test_youtube_lists] Add test flat playlist entries' titles	2015-10-18 00:27:06 +06:00
Sergey M․	648e6a1ffe	[youtube] Generalize playlist entries extraction (Closes #6699 , closes #6992 )	2015-10-18 00:11:34 +06:00
Lukáš Lalinský	583882fdce	[dailymotion] Report errors from player v5	2015-10-17 19:26:30 +02:00
Sergey M․	9eb31b265f	[vidme] Add user-disabled test	2015-10-17 23:01:24 +06:00
Sergey M.	ddeb1b3de2	Merge pull request #7211 from lalinsky/vidme-suspended [vidme] Better error message for suspended vidme videos	2015-10-17 22:56:51 +06:00
Lukáš Lalinský	59fe4824f8	[vidme] Better error message for suspended vidme videos	2015-10-17 18:52:25 +02:00
Sergey M․	dd8417526b	[vimeo] Clarify new react+flux website fallback	2015-10-17 22:48:14 +06:00
Sergey M.	09670d5ba4	Merge pull request #7209 from lalinsky/vimeo-new-page Extract config URL from (new?) React-based Vimeo's page	2015-10-17 22:39:17 +06:00
Lukáš Lalinský	41a7b00f18	[vimeo] Extract config URL from (new?) React-based Vimeo's page	2015-10-17 18:30:56 +02:00
Sergey M․	350c948133	[twitch:vod] Formatting	2015-10-17 18:43:12 +06:00
Sergey M․	e5e9966199	[twitch:vod] Improve extraction	2015-10-17 18:29:54 +06:00
Sergey M․	fbd9f6ea80	[twitch] Improve authentication	2015-10-17 18:28:21 +06:00
Jaime Marquínez Ferrándiz	6df7179e6c	[rte] Actually recognize https urls There was a missing 's' before the '?'.	2015-10-17 11:53:59 +02:00
Jaime Marquínez Ferrándiz	36eb802baf	[rte] Replace expired test According to their webpage it should be available until October 2035.	2015-10-17 11:49:51 +02:00
fnord	9e7e0dffd5	Actually add the extractor	2015-07-21 16:56:35 -05:00
fnord	c3dea3f878	Twittercard: support vmapurl method	2015-07-21 16:45:36 -05:00
fnord	f57f84f606	Twitter: get and describe video from status urls	2015-07-21 16:38:40 -05:00