release 2014.12.12.2

[cnet] Update to new theplatform infrastructure (Fixes #2736 )
[goldenmoustache] Remove view count
2014-12-12 15:56:45 +01:00 · 2014-12-12 15:55:55 +01:00 · 2014-12-12 13:09:55 +01:00 · 2014-12-12 13:07:43 +01:00 · 2014-12-12 13:03:16 +01:00 · 2014-12-12 12:55:13 +01:00
27 changed files with 473 additions and 329 deletions
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish

 clean:
-	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
+	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json

 cleanall: clean
 	rm -f youtube-dl youtube-dl.exe
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -144,6 +144,9 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
        self.assertEqual(unified_strdate('1968-12-10'), '19681210')
        self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
+        self.assertEqual(
+            unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
+            '20141126')

    def test_find_xpath_attr(self):
        testxml = '''<root>
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -622,23 +622,15 @@ class YoutubeDL(object):
                ie_result['url'], ie_key=ie_result.get('ie_key'),
                extra_info=extra_info, download=False, process=False)

-            def make_result(embedded_info):
-                new_result = ie_result.copy()
-                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
-                          'entries', 'ie_key', 'duration',
-                          'subtitles', 'annotations', 'format',
-                          'thumbnail', 'thumbnails'):
-                    if f in new_result:
-                        del new_result[f]
-                    if f in embedded_info:
-                        new_result[f] = embedded_info[f]
-                return new_result
-            new_result = make_result(info)
+            force_properties = dict(
+                (k, v) for k, v in ie_result.items() if v is not None)
+            for f in ('_type', 'url'):
+                if f in force_properties:
+                    del force_properties[f]
+            new_result = info.copy()
+            new_result.update(force_properties)

            assert new_result.get('_type') != 'url_transparent'
-            if new_result.get('_type') == 'compat_list':
-                new_result['entries'] = [
-                    make_result(e) for e in new_result['entries']]

            return self.process_ie_result(
                new_result, download=download, extra_info=extra_info)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -297,7 +297,9 @@ else:

 # Old 2.6 and 2.7 releases require kwargs to be bytes
 try:
-    (lambda x: x)(**{'x': 0})
+    def _testfunc(x):
+        pass
+    _testfunc(**{'x': 0})
 except TypeError:
    def compat_kwargs(kwargs):
        return dict((bytes(k), v) for k, v in kwargs.items())
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -51,7 +51,7 @@ from .cbsnews import CBSNewsIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
-from .cinemassacre import CinemassacreIE
+from .cinchcast import CinchcastIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
@@ -336,6 +336,7 @@ from .savefrom import SaveFromIE
 from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
+from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
@@ -526,7 +527,7 @@ from .youtube import (
    YoutubeUserIE,
    YoutubeWatchLaterIE,
 )
-from .zdf import ZDFIE
+from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import (
    ZingMp3SongIE,
    ZingMp3AlbumIE,
--- a/youtube_dl/extractor/behindkink.py
+++ b/youtube_dl/extractor/behindkink.py
@@ -10,15 +10,15 @@ from ..utils import url_basename
 class BehindKinkIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
    _TEST = {
-        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
-        'md5': '41ad01222b8442089a55528fec43ec01',
+        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
        'info_dict': {
-            'id': '36370',
+            'id': '37127',
            'ext': 'mp4',
-            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
-            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
-            'upload_date': '20140814',
-            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
+            'title': 'What are you passionate about – Marley Blaze',
+            'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+            'upload_date': '20141205',
+            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
            'age_limit': 18,
        }
    }
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
-        year = mobj.group('year')
-        month = mobj.group('month')
-        day = mobj.group('day')
-        upload_date = year + month + day

        webpage = self._download_webpage(url, display_id)

        video_url = self._search_regex(
-            r"'file':\s*'([^']+)'",
-            webpage, 'URL base')
-
-        video_id = url_basename(video_url)
-        video_id = video_id.split('_')[0]
+            r'<source src="([^"]+)"', webpage, 'video URL')
+        video_id = url_basename(video_url).split('_')[0]
+        upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')

        return {
            'id': video_id,
-            'url': video_url,
-            'ext': 'mp4',
-            'title': self._og_search_title(webpage),
            'display_id': display_id,
+            'url': video_url,
+            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
            'upload_date': upload_date,
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -236,16 +236,17 @@ class Channel9IE(InfoExtractor):
        if contents is None:
            return contents

-        session_meta = {'session_code': self._extract_session_code(html),
-                        'session_day': self._extract_session_day(html),
-                        'session_room': self._extract_session_room(html),
-                        'session_speakers': self._extract_session_speakers(html),
-                        }
+        session_meta = {
+            'session_code': self._extract_session_code(html),
+            'session_day': self._extract_session_day(html),
+            'session_room': self._extract_session_room(html),
+            'session_speakers': self._extract_session_speakers(html),
+        }

        for content in contents:
            content.update(session_meta)

-        return contents
+        return self.playlist_result(contents)

    def _extract_list(self, content_path):
        rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
--- a/youtube_dl/extractor/cinchcast.py
+++ b/youtube_dl/extractor/cinchcast.py
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+    xpath_text,
+)
+
+
+class CinchcastIE(InfoExtractor):
+    _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
+    _TEST = {
+        # Actual test is run in generic, look for undergroundwellness
+        'url': 'http://player.cinchcast.com/?platformId=1&#038;assetType=single&#038;assetId=7141703',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        doc = self._download_xml(
+            'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
+            video_id)
+
+        item = doc.find('.//item')
+        title = xpath_text(item, './title', fatal=True)
+        date_str = xpath_text(
+            item, './{http://developer.longtailvideo.com/trac/}date')
+        upload_date = unified_strdate(date_str, day_first=False)
+        # duration is present but wrong
+        formats = []
+        formats.append({
+            'format_id': 'main',
+            'url': item.find(
+                './{http://search.yahoo.com/mrss/}content').attrib['url'],
+        })
+        backup_url = xpath_text(
+            item, './{http://developer.longtailvideo.com/trac/}backupContent')
+        if backup_url:
+            formats.append({
+                'preference': 2,  # seems to be more reliable
+                'format_id': 'backup',
+                'url': backup_url,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@@ -15,23 +15,24 @@ class CNETIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
    _TEST = {
        'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
-        'md5': '041233212a0d06b179c87cbcca1577b8',
        'info_dict': {
            'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
-            'ext': 'mp4',
+            'ext': 'flv',
            'title': 'Hands-on with Microsoft Windows 8.1 Update',
            'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
            'thumbnail': 're:^http://.*/flmswindows8.jpg$',
-            'uploader_id': 'sarah.mitroff@cbsinteractive.com',
+            'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
            'uploader': 'Sarah Mitroff',
+        },
+        'params': {
+            'skip_download': 'requires rtmpdump',
        }
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
-
+        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
+
        data_json = self._html_search_regex(
            r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
            webpage, 'data json')
@@ -42,37 +43,31 @@ class CNETIE(InfoExtractor):
        if not vdata:
            raise ExtractorError('Cannot find video data')

+        mpx_account = data['config']['players']['default']['mpx_account']
+        vid = vdata['files']['rtmp']
+        tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
+
        video_id = vdata['id']
        title = vdata.get('headline')
        if title is None:
            title = vdata.get('title')
        if title is None:
            raise ExtractorError('Cannot find title!')
-        description = vdata.get('dek')
        thumbnail = vdata.get('image', {}).get('path')
        author = vdata.get('author')
        if author:
            uploader = '%s %s' % (author['firstName'], author['lastName'])
-            uploader_id = author.get('email')
+            uploader_id = author.get('id')
        else:
            uploader = None
            uploader_id = None

-        formats = [{
-            'format_id': '%s-%s-%s' % (
-                f['type'], f['format'],
-                int_or_none(f.get('bitrate'), 1000, default='')),
-            'url': f['uri'],
-            'tbr': int_or_none(f.get('bitrate'), 1000),
-        } for f in vdata['files']['data']]
-        self._sort_formats(formats)
-
        return {
+            '_type': 'url_transparent',
+            'url': tp_link,
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'formats': formats,
-            'description': description,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'thumbnail': thumbnail,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -118,6 +118,7 @@ class InfoExtractor(object):

    The following fields are optional:

+    alt_title:      A secondary title of the video.
    display_id      An alternative identifier for the video, not necessarily
                    unique, but available before title. Typically, id is
                    something like "4234987", title "Dancing naked mole rats",
@@ -129,7 +130,7 @@ class InfoExtractor(object):
                        * "resolution" (optional, string "{width}x{height"},
                                        deprecated)
    thumbnail:      Full URL to a video thumbnail image.
-    description:    One-line video description.
+    description:    Full video description.
    uploader:       Full name of the video uploader.
    timestamp:      UNIX timestamp of the moment the video became available.
    upload_date:    Video upload date (YYYYMMDD).
@@ -391,6 +392,10 @@ class InfoExtractor(object):
            url_or_request, video_id, note, errnote, fatal=fatal)
        if (not fatal) and json_string is False:
            return None
+        return self._parse_json(
+            json_string, video_id, transform_source=transform_source, fatal=fatal)
+
+    def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
        if transform_source:
            json_string = transform_source(json_string)
        try:
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -13,9 +13,10 @@ from ..compat import (
    compat_urllib_request,
 )
 from ..utils import (
-    urlencode_postdata,
    ExtractorError,
+    int_or_none,
    limit_length,
+    urlencode_postdata,
 )


@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
        'info_dict': {
            'id': '637842556329505',
            'ext': 'mp4',
-            'duration': 38,
            'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
        }
    }, {
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
        self._login()

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
        webpage = self._download_webpage(url, video_id)

@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
            'id': video_id,
            'title': video_title,
            'url': video_url,
-            'duration': int(video_data['video_duration']),
-            'thumbnail': video_data['thumbnail_src'],
+            'duration': int_or_none(video_data.get('video_duration')),
+            'thumbnail': video_data.get('thumbnail_src'),
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -467,8 +467,17 @@ class GenericIE(InfoExtractor):
            'expected_warnings': [
                'URL could be a direct video link, returning it as such.'
            ]
-        }
-
+        },
+        # Cinchcast embed
+        {
+            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+            'info_dict': {
+                'id': '7141703',
+                'ext': 'mp3',
+                'upload_date': '20141126',
+                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+            }
+        },
    ]

    def report_following_redirect(self, new_url):
@@ -962,6 +971,13 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'SBS')

+        # Look for embedded Cinchcast player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Cinchcast')
+
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
            webpage)
--- a/youtube_dl/extractor/goldenmoustache.py
+++ b/youtube_dl/extractor/goldenmoustache.py
@@ -17,7 +17,6 @@ class GoldenMoustacheIE(InfoExtractor):
            'title': 'Suricate - Le Poker',
            'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
            'thumbnail': 're:^https?://.*\.jpg$',
-            'view_count': int,
        }
    }, {
        'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
@@ -28,7 +27,6 @@ class GoldenMoustacheIE(InfoExtractor):
            'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
            'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
            'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
-            'view_count': int,
        }
    }]

@@ -42,9 +40,6 @@ class GoldenMoustacheIE(InfoExtractor):
            r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage)
-        view_count = int_or_none(self._html_search_regex(
-            r'<strong>([0-9]+)</strong>\s*VUES</span>',
-            webpage, 'view count', fatal=False))

        return {
            'id': video_id,
@@ -53,5 +48,4 @@ class GoldenMoustacheIE(InfoExtractor):
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
-            'view_count': view_count,
        }
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -2,57 +2,52 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+)
 from ..utils import (
-    compat_urlparse,
-    ExtractorError,
+    parse_duration,
 )


 class GoshgayIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
+    _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
    _TEST = {
-        'url': 'http://www.goshgay.com/video4116282',
-        'md5': '268b9f3c3229105c57859e166dd72b03',
+        'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
+        'md5': '027fcc54459dff0feb0bc06a7aeda680',
        'info_dict': {
-            'id': '4116282',
+            'id': '299069',
            'ext': 'flv',
-            'title': 'md5:089833a4790b5e103285a07337f245bf',
-            'thumbnail': 're:http://.*\.jpg',
+            'title': 'DIESEL SFW XXX Video',
+            'thumbnail': 're:^http://.*\.jpg$',
+            'duration': 79,
            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
-
        webpage = self._download_webpage(url, video_id)
-        title = self._og_search_title(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
+
+        title = self._html_search_regex(
+            r'<h2>(.*?)<', webpage, 'title')
+        duration = parse_duration(self._html_search_regex(
+            r'<span class="duration">\s*-?\s*(.*?)</span>',
+            webpage, 'duration', fatal=False))
        family_friendly = self._html_search_meta(
            'isFamilyFriendly', webpage, default='false')
-        config_url = self._search_regex(
-            r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')

-        config = self._download_xml(
-            config_url, video_id, 'Downloading player config XML')
-
-        if config is None:
-            raise ExtractorError('Missing config XML')
-        if config.tag != 'config':
-            raise ExtractorError('Missing config attribute')
-        fns = config.findall('file')
-        if len(fns) < 1:
-            raise ExtractorError('Missing media URI')
-        video_url = fns[0].text
-
-        url_comp = compat_urlparse.urlparse(url)
-        ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
+        flashvars = compat_parse_qs(self._html_search_regex(
+            r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
+            webpage, 'flashvars'))
+        thumbnail = flashvars.get('url_bigthumb', [None])[0]
+        video_url = flashvars['flv_url'][0]

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
-            'http_referer': ref,
+            'duration': duration,
            'age_limit': 0 if family_friendly == 'true' else 18,
        }
--- a/youtube_dl/extractor/helsinki.py
+++ b/youtube_dl/extractor/helsinki.py
@@ -2,9 +2,8 @@

 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
+from ..utils import js_to_json


 class HelsinkiIE(InfoExtractor):
@@ -24,39 +23,21 @@ class HelsinkiIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        formats = []
-
-        mobj = re.search(r'file=((\w+):[^&]+)', webpage)
-        if mobj:
-            formats.append({
-                'ext': mobj.group(2),
-                'play_path': mobj.group(1),
-                'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
-                'player_url': 'http://video.helsinki.fi/player.swf',
-                'format_note': 'sd',
-                'quality': 0,
-            })
-
-        mobj = re.search(r'hd\.file=((\w+):[^&]+)', webpage)
-        if mobj:
-            formats.append({
-                'ext': mobj.group(2),
-                'play_path': mobj.group(1),
-                'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
-                'player_url': 'http://video.helsinki.fi/player.swf',
-                'format_note': 'hd',
-                'quality': 1,
-            })

+        params = self._parse_json(self._html_search_regex(
+            r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
+            webpage, 'player code'), video_id, transform_source=js_to_json)
+        formats = [{
+            'url': s['file'],
+            'ext': 'mp4',
+        } for s in params['sources']]
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': self._og_search_title(webpage).replace('Video: ', ''),
            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
        }
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -1,12 +1,12 @@
 from __future__ import unicode_literals

-import re
-import json
-import random
-import string
-
 from .common import InfoExtractor
-from ..utils import find_xpath_attr
+from ..utils import (
+    find_xpath_attr,
+    int_or_none,
+    js_to_json,
+    unescapeHTML,
+)


 class HowStuffWorksIE(InfoExtractor):
@@ -16,98 +16,74 @@ class HowStuffWorksIE(InfoExtractor):
            'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
            'info_dict': {
                'id': '450221',
-                'display_id': 'cool-jobs-iditarod-musher',
                'ext': 'flv',
                'title': 'Cool Jobs - Iditarod Musher',
-                'description': 'md5:82bb58438a88027b8186a1fccb365f90',
+                'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
+                'display_id': 'cool-jobs-iditarod-musher',
                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 161,
            },
-            'params': {
-                # md5 is not consistent
-                'skip_download': True
-            }
        },
        {
            'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
            'info_dict': {
                'id': '453464',
-                'display_id': 'survival-zone-food-and-water-in-the-savanna',
                'ext': 'mp4',
                'title': 'Survival Zone: Food and Water In the Savanna',
-                'description': 'md5:7e1c89f6411434970c15fa094170c371',
+                'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
+                'display_id': 'survival-zone-food-and-water-in-the-savanna',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
-            'params': {
-                # md5 is not consistent
-                'skip_download': True
-            }
        },
        {
            'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
            'info_dict': {
                'id': '440011',
-                'display_id': 'sword-swallowing-1-by-dan-meyer',
                'ext': 'flv',
                'title': 'Sword Swallowing #1 by Dan Meyer',
-                'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
+                'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
+                'display_id': 'sword-swallowing-1-by-dan-meyer',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
-            'params': {
-                # md5 is not consistent
-                'skip_download': True
-            }
        },
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
+        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
+        clip_js = self._search_regex(
+            r'(?s)var clip = ({.*?});', webpage, 'clip info')
+        clip_info = self._parse_json(
+            clip_js, display_id, transform_source=js_to_json)

-        content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
-
-        mp4 = self._search_regex(
-            r'''(?xs)var\s+clip\s*=\s*{\s*
-                .+?\s*
-                content_id\s*:\s*%s\s*,\s*
-                .+?\s*
-                mp4\s*:\s*\[(.*?),?\]\s*
-                };\s*
-                videoData\.push\(clip\);''' % content_id,
-            webpage, 'mp4', fatal=False, default=None)
-
-        smil = self._download_xml(
-            'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
-            content_id, 'Downloading video SMIL')
-
-        http_base = find_xpath_attr(
-            smil,
-            './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
-            'name',
-            'httpBase').get('content')
-
-        def random_string(str_len=0):
-            return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
-
-        URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
-
+        video_id = clip_info['content_id']
        formats = []
+        m3u8_url = clip_info.get('m3u8')
+        if m3u8_url:
+            formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        for video in clip_info.get('mp4', []):
+            formats.append({
+                'url': video['src'],
+                'format_id': video['bitrate'],
+                'vbr': int(video['bitrate'].rstrip('k')),
+            })
+
+        if not formats:
+            smil = self._download_xml(
+                'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id,
+                video_id, 'Downloading video SMIL')
+
+            http_base = find_xpath_attr(
+                smil,
+                './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
+                'name',
+                'httpBase').get('content')
+
+            URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'

-        if mp4:
-            for video in json.loads('[%s]' % mp4):
-                bitrate = video['bitrate']
-                fmt = {
-                    'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
-                    'format_id': bitrate,
-                }
-                m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
-                if m:
-                    fmt['vbr'] = int(m.group('vbr'))
-                formats.append(fmt)
-        else:
            for video in smil.findall(
-                    './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
-                vbr = int(video.attrib['system-bitrate']) / 1000
+                    './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
+                vbr = int_or_none(video.attrib['system-bitrate'], scale=1000)
                formats.append({
                    'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
                    'format_id': '%dk' % vbr,
@@ -116,19 +92,12 @@ class HowStuffWorksIE(InfoExtractor):

        self._sort_formats(formats)

-        title = self._og_search_title(webpage)
-        TITLE_SUFFIX = ' : HowStuffWorks'
-        if title.endswith(TITLE_SUFFIX):
-            title = title[:-len(TITLE_SUFFIX)]
-
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-
        return {
-            'id': content_id,
+            'id': '%s' % video_id,
            'display_id': display_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
+            'title': unescapeHTML(clip_info['clip_title']),
+            'description': unescapeHTML(clip_info.get('caption')),
+            'thumbnail': clip_info.get('video_still_url'),
+            'duration': clip_info.get('duration'),
            'formats': formats,
        }
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor):
            raise ExtractorError('Unable to extract track url')

        PREFIX = (
-            r'<div class="cloudcast-play-button-container[^"]*?"'
+            r'<span class="play-button[^"]*?"'
            r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
        title = self._html_search_regex(
            PREFIX + r'm-title="([^"]+)"', webpage, 'title')
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):
                'rtmp_conn': 'B:1',
                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                'page_url': 'http://www.ntv.ru',
-                'flash_ver': 'LNX 11,2,202,341',
+                'flash_version': 'LNX 11,2,202,341',
                'rtmp_live': True,
                'ext': 'flv',
                'filesize': int(size.text),
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -8,7 +8,6 @@ from ..utils import (
    int_or_none,
    js_to_json,
    qualities,
-    determine_ext,
 )


@@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor):
        thumbnail = self._search_regex(
            r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)

-        quality = qualities(['SD', 'HD'])
-        formats = [{
-            'url': source['file'],
-            'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])),
-            'quality': quality(source['label']),
-        } for source in json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))]
+        quality = qualities(['sd', 'hd'])
+        sources = json.loads(js_to_json(self._search_regex(
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+        formats = []
+        for container, s in sources.items():
+            for qname, video_url in s.items():
+                formats.append({
+                    'url': video_url,
+                    'container': container,
+                    'format_id': '%s-%s' % (container, qname),
+                    'quality': quality(qname),
+                })
        self._sort_formats(formats)

        return {
--- a/youtube_dl/extractor/screenwavemedia.py
+++ b/youtube_dl/extractor/screenwavemedia.py
@@ -5,61 +5,27 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
    int_or_none,
+    unified_strdate,
 )


-class CinemassacreIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
-    _TESTS = [
-        {
-            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            'info_dict': {
-                'id': '19911',
-                'ext': 'mp4',
-                'upload_date': '20121110',
-                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
-                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
-            },
-        },
-        {
-            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
-            'info_dict': {
-                'id': '521be8ef82b16',
-                'ext': 'mp4',
-                'upload_date': '20131002',
-                'title': 'The Mummy’s Hand (1940)',
-            },
-        }
-    ]
+class ScreenwaveMediaIE(InfoExtractor):
+    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
+
+    _TESTS = [{
+        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-
-        webpage = self._download_webpage(url, display_id)
-        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group('embed_url')
-        video_id = mobj.group('video_id')
-        full_video_id = mobj.group('full_video_id')
-
-        video_title = self._html_search_regex(
-            r'<title>(?P<title>.+?)\|', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, 'description', flags=re.DOTALL, fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
+        video_id = self._match_id(url)
+        playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')

+        vidtitle = self._search_regex(
+            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
        vidurl = self._search_regex(
-            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')

        videolist_url = None

@@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor):
        if mobj:
            videoserver = mobj.group('videoserver')
            mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
-            vidid = mobj.group('vidid') if mobj else full_video_id
+            vidid = mobj.group('vidid') if mobj else video_id
            videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
        else:
            mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
@@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor):
                file_ = src.partition(':')[-1]
                width = int_or_none(video.get('width'))
                height = int_or_none(video.get('height'))
-                bitrate = int_or_none(video.get('system-bitrate'))
+                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
                format = {
                    'url': baseurl + file_,
                    'format_id': src.rpartition('.')[0].rpartition('_')[-1],
                }
                if width or height:
                    format.update({
-                        'tbr': bitrate // 1000 if bitrate else None,
+                        'tbr': bitrate,
                        'width': width,
                        'height': height,
                    })
                else:
                    format.update({
-                        'abr': bitrate // 1000 if bitrate else None,
+                        'abr': bitrate,
                        'vcodec': 'none',
                    })
                formats.append(format)
-            self._sort_formats(formats)
        else:
            formats = [{
                'url': vidurl,
            }]
+        self._sort_formats(formats)

        return {
            'id': video_id,
-            'title': video_title,
+            'title': vidtitle,
            'formats': formats,
+        }
+
+
+class CinemassacreIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+    _TESTS = [
+        {
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'info_dict': {
+                'id': 'Cinemassacre-19911',
+                'ext': 'mp4',
+                'upload_date': '20121110',
+                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+            },
+        },
+        {
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'info_dict': {
+                'id': 'Cinemassacre-521be8ef82b16',
+                'ext': 'mp4',
+                'upload_date': '20131002',
+                'title': 'The Mummy’s Hand (1940)',
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
+            webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
            'description': video_description,
            'upload_date': video_date,
            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }
+
+
+class TeamFourIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
+    _TEST = {
+        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
+        'info_dict': {
+            'id': 'TeamFourStar-5292a02f20bfa',
+            'ext': 'mp4',
+            'upload_date': '20130401',
+            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
+            'title': 'A Moment With TFS Episode 4',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+
+        video_title = self._html_search_regex(
+            r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
+            webpage, 'title')
+        video_date = unified_strdate(self._html_search_regex(
+            r'<div class="heroheadingdate">(?P<date>.+?)</div>',
+            webpage, 'date', fatal=False))
+        video_description = self._html_search_regex(
+            r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
+            webpage, 'description', fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
        }
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):
        broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')

        if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
-            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
+            raise ExtractorError(
+                'Broadcast %s does not exist' % broadcast_id, expected=True)

        # Adult content
        if re.search('EroConfirmText">', broadcast_page) is not None:

            (username, password) = self._get_login_info()
            if username is None:
-                raise ExtractorError('Erotic broadcasts allowed only for registered users, '
-                                     'use --username and --password options to provide account credentials.', expected=True)
+                raise ExtractorError(
+                    'Erotic broadcasts allowed only for registered users, '
+                    'use --username and --password options to provide account credentials.',
+                    expected=True)

            login_form = {
                'login-hint53': '1',
@@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):
                'password': password,
            }

-            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+            request = compat_urllib_request.Request(
+                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
+            broadcast_page = self._download_webpage(
+                request, broadcast_id, 'Logging in and confirming age')

            if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
                raise ExtractorError('Unable to log in: bad username or password', expected=True)
@@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):
            adult_content = False

        ticket = self._html_search_regex(
-            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
            broadcast_page, 'broadcast ticket')

        url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
@@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):
        if broadcast_password:
            url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()

-        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
+        broadcast_json_page = self._download_webpage(
+            url, broadcast_id, 'Downloading broadcast JSON')

        try:
            broadcast_json = json.loads(broadcast_json_page)

            protected_broadcast = broadcast_json['_pass_protected'] == 1
            if protected_broadcast and not broadcast_password:
-                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
+                raise ExtractorError(
+                    'This broadcast is protected by a password, use the --video-password option',
+                    expected=True)

            broadcast_offline = broadcast_json['is_play'] == 0
            if broadcast_offline:
                raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)

            rtmp_url = broadcast_json['_server']
-            if not rtmp_url.startswith('rtmp://'):
+            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
+            if not mobj:
                raise ExtractorError('Unexpected broadcast rtmp URL')

            broadcast_playpath = broadcast_json['_streamName']
+            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
            broadcast_thumbnail = broadcast_json['_imgURL']
-            broadcast_title = broadcast_json['title']
+            broadcast_title = self._live_title(broadcast_json['title'])
            broadcast_description = broadcast_json['description']
            broadcaster_nick = broadcast_json['nick']
            broadcaster_login = broadcast_json['login']
@@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):
            'age_limit': 18 if adult_content else 0,
            'ext': 'flv',
            'play_path': broadcast_playpath,
+            'player_url': 'http://pics.smotri.com/broadcast_play.swf',
+            'app': broadcast_app,
            'rtmp_live': True,
-            'rtmp_conn': rtmp_conn
+            'rtmp_conn': rtmp_conn,
+            'is_live': True,
        }
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
-    ExtractorError,
    parse_iso8601,
    qualities,
 )
@@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor):
            'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')

        if video['is_geo_blocked']:
-            raise ExtractorError(
-                'This content is not available in your country due to copyright reasons', expected=True)
+            self.report_warning(
+                'This content might not be available in your country due to copyright reasons')

        streams = self._download_json(
            'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -17,6 +17,7 @@ class VineIE(InfoExtractor):
            'id': 'b9KOOWX7HUx',
            'ext': 'mp4',
            'title': 'Chicken.',
+            'alt_title': 'Vine by Jack Dorsey',
            'description': 'Chicken.',
            'upload_date': '20130519',
            'uploader': 'Jack Dorsey',
@@ -25,30 +26,26 @@ class VineIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)

        data = json.loads(self._html_search_regex(
            r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))

-        formats = [
-            {
-                'url': data['videoLowURL'],
-                'ext': 'mp4',
-                'format_id': 'low',
-            },
-            {
-                'url': data['videoUrl'],
-                'ext': 'mp4',
-                'format_id': 'standard',
-            }
-        ]
+        formats = [{
+            'url': data['videoLowURL'],
+            'ext': 'mp4',
+            'format_id': 'low',
+        }, {
+            'url': data['videoUrl'],
+            'ext': 'mp4',
+            'format_id': 'standard',
+        }]

        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
+            'alt_title': self._og_search_description(webpage),
            'description': data['description'],
            'thumbnail': data['thumbnailUrl'],
            'upload_date': unified_strdate(data['created']),
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
-from ..utils import (
+from ..compat import (
    compat_chr,
    compat_parse_qs,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
    compat_str,
-
+)
+from ..utils import (
    clean_html,
-    get_element_by_id,
-    get_element_by_attribute,
    ExtractorError,
+    get_element_by_attribute,
+    get_element_by_id,
    int_or_none,
    OnDemandPagedList,
+    orderedSet,
    unescapeHTML,
    unified_strdate,
-    orderedSet,
    uppercase_escape,
 )

@@ -432,7 +433,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'expected_warnings': [
                'DASH manifest missing',
            ]
-        }
+        },
+        # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+        {
+            'url': 'lqQg6PlCWgI',
+            'info_dict': {
+                'id': 'lqQg6PlCWgI',
+                'ext': 'mp4',
+                'upload_date': '20120731',
+                'uploader_id': 'olympic',
+                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+                'uploader': 'Olympics',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+            },
+            'params': {
+                'skip_download': 'requires avconv',
+            }
+        },
    ]

    def __init__(self, *args, **kwargs):
@@ -856,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

        m_cat_container = self._search_regex(
            r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
-            video_webpage, 'categories', fatal=False)
+            video_webpage, 'categories', default=None)
        if m_cat_container:
            category = self._html_search_regex(
                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
@@ -934,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'url': video_info['conn'][0],
                'player_url': player_url,
            }]
-        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+        elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
            if 'rtmpe%3Dyes' in encoded_url_map:
                raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
@@ -1000,9 +1017,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        # Look for the DASH manifest
        if self._downloader.params.get('youtube_include_dash_manifest', True):
            dash_mpd = video_info.get('dashmpd')
-            if not dash_mpd:
-                self.report_warning('%s: DASH manifest missing' % video_id)
-            else:
+            if dash_mpd:
                dash_manifest_url = dash_mpd[0]
                try:
                    dash_formats = self._parse_dash_manifest(
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import functools
 import re

 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    unified_strdate,
+    OnDemandPagedList,
 )


@@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):


 class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+    _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'

    _TEST = {
        'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
@@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-
        xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        return extract_from_xml_url(self, video_id, xml_url)
+
+
+class ZDFChannelIE(InfoExtractor):
+    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
+        'info_dict': {
+            'id': '1586442',
+        },
+        'playlist_count': 4,
+    }
+    _PAGE_SIZE = 50
+
+    def _fetch_page(self, channel_id, page):
+        offset = page * self._PAGE_SIZE
+        xml_url = (
+            'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
+            % (offset, self._PAGE_SIZE, channel_id))
+        doc = self._download_xml(
+            xml_url, channel_id,
+            note='Downloading channel info',
+            errnote='Failed to download channel info')
+
+        title = doc.find('.//information/title').text
+        description = doc.find('.//information/detail').text
+        for asset in doc.findall('.//teasers/teaser'):
+            a_type = asset.find('./type').text
+            a_id = asset.find('./details/assetId').text
+            if a_type not in ('video', 'topic'):
+                continue
+            yield {
+                '_type': 'url',
+                'playlist_title': title,
+                'playlist_description': description,
+                'url': 'zdf:%s:%s' % (a_type, a_id),
+            }
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'entries': entries,
+        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -166,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
        xpath = xpath.encode('ascii')

    n = node.find(xpath)
-    if n is None:
+    if n is None or n.text is None:
        if fatal:
            name = xpath if name is None else name
            raise ExtractorError('Could not find XML element %s' % name)
@@ -644,17 +644,19 @@ def parse_iso8601(date_str, delimiter='T'):
    return calendar.timegm(dt.timetuple())


-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
    """Return a string with the date in the format YYYYMMDD"""

    if date_str is None:
        return None
-
    upload_date = None
    # Replace commas
    date_str = date_str.replace(',', ' ')
    # %z (UTC offset) is only supported in python>=3.2
    date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    # Remove AM/PM + timezone
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
+
    format_expressions = [
        '%d %B %Y',
        '%d %b %Y',
@@ -669,7 +671,6 @@ def unified_strdate(date_str):
        '%d/%m/%Y',
        '%d/%m/%y',
        '%Y/%m/%d %H:%M:%S',
-        '%d/%m/%Y %H:%M:%S',
        '%Y-%m-%d %H:%M:%S',
        '%Y-%m-%d %H:%M:%S.%f',
        '%d.%m.%Y %H:%M',
@@ -681,6 +682,14 @@ def unified_strdate(date_str):
        '%Y-%m-%dT%H:%M:%S.%f',
        '%Y-%m-%dT%H:%M',
    ]
+    if day_first:
+        format_expressions.extend([
+            '%d/%m/%Y %H:%M:%S',
+        ])
+    else:
+        format_expressions.extend([
+            '%m/%d/%Y %H:%M:%S',
+        ])
    for expression in format_expressions:
        try:
            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -712,8 +721,10 @@ def date_from_str(date_str):
    Return a datetime object from a string in the format YYYYMMDD or
    (now|today)[+-][0-9](day|week|month|year)(s)?"""
    today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
        return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
    match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
    if match is not None:
        sign = match.group('sign')
@@ -808,22 +819,22 @@ def _windows_write_string(s, out):

    GetStdHandle = ctypes.WINFUNCTYPE(
        ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
-        ("GetStdHandle", ctypes.windll.kernel32))
+        (b"GetStdHandle", ctypes.windll.kernel32))
    h = GetStdHandle(WIN_OUTPUT_IDS[fileno])

    WriteConsoleW = ctypes.WINFUNCTYPE(
        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
        ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
-        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+        ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
    written = ctypes.wintypes.DWORD(0)

-    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
    FILE_TYPE_CHAR = 0x0002
    FILE_TYPE_REMOTE = 0x8000
    GetConsoleMode = ctypes.WINFUNCTYPE(
        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
        ctypes.POINTER(ctypes.wintypes.DWORD))(
-        ("GetConsoleMode", ctypes.windll.kernel32))
+        (b"GetConsoleMode", ctypes.windll.kernel32))
    INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value

    def not_a_console(handle):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2014.12.10.2'
+__version__ = '2014.12.12.2'
Author	SHA1	Message	Date
Philipp Hagemeister	3b0bec8d11	release 2014.12.12.2	2014-12-12 15:56:45 +01:00
Philipp Hagemeister	412c617d0f	[cnet] Update to new theplatform infrastructure (Fixes #2736 )	2014-12-12 15:55:55 +01:00
Philipp Hagemeister	751536f5c8	[goldenmoustache] Remove view count view count is not present anymore, so we can't extract it.	2014-12-12 13:09:55 +01:00
Philipp Hagemeister	025f30ba38	[channel9] Do not return compat_list results anymore	2014-12-12 13:07:43 +01:00
Philipp Hagemeister	0d2fb1d193	[helsinki] Fix extraction	2014-12-12 13:03:16 +01:00
Philipp Hagemeister	82b34105d3	[goshgay] Fix extraction	2014-12-12 12:55:13 +01:00
Philipp Hagemeister	73aeb2dc56	[goshgay] Modernize	2014-12-12 12:44:50 +01:00
Philipp Hagemeister	c6973bd412	[compat] Simplify kwarg detection code This enables nuitka to compile youtube-dl.	2014-12-12 12:42:35 +01:00
Philipp Hagemeister	f8780e6d11	Merge remote-tracking branch 'grompe/patch-1'	2014-12-12 11:35:04 +01:00
Philipp Hagemeister	e2f89ec7aa	Revert "[utils] Work around PyPy stupidity with Windows DLLs (Fixes #4392 )" This reverts commit `16040f46d6`.	2014-12-12 11:33:55 +01:00
Philipp Hagemeister	62651c556a	[howstuffworks] Parse only once, but right (#4383 )	2014-12-12 04:23:34 +01:00
Philipp Hagemeister	bf94e38d3d	Merge remote-tracking branch 'Tithen-Firion/hsw-update'	2014-12-12 04:10:55 +01:00
Philipp Hagemeister	4f97852316	Remove unused imports	2014-12-12 04:09:32 +01:00
Philipp Hagemeister	16040f46d6	[utils] Work around PyPy stupidity with Windows DLLs (Fixes #4392 )	2014-12-12 04:01:08 +01:00
Philipp Hagemeister	d068ba24f3	release 2014.12.12.1	2014-12-12 03:34:33 +01:00
Philipp Hagemeister	f5e43bc695	[vine] Provide alt_title (Fixes #4448 )	2014-12-12 03:34:28 +01:00
Philipp Hagemeister	6a5308ab49	release 2014.12.12	2014-12-12 03:02:56 +01:00
Philipp Hagemeister	63e0f29564	[vine] Modernize	2014-12-12 02:59:52 +01:00
Philipp Hagemeister	42bdd9d051	[cinchcast] Add new extractor (Fixes #4428 )	2014-12-12 02:57:36 +01:00
Philipp Hagemeister	4e40de6e2a	Merge branch 'master' of github.com:rg3/youtube-dl	2014-12-12 02:14:31 +01:00
Philipp Hagemeister	0fa2b899d1	[Makefile] remove *.info.json in clean target	2014-12-12 02:14:04 +01:00
Philipp Hagemeister	f17e4c9c28	[screenwavemedia] Simplify (#3766 )	2014-12-12 02:11:58 +01:00
Philipp Hagemeister	807962f4a1	[pornhd] Adapt to new sources scheme (Fixes #4446 )	2014-12-11 23:50:25 +01:00
Jaime Marquínez Ferrándiz	9c1aa1d668	[mixcloud] Fix metadata extraction (fixes #4443 )	2014-12-11 23:16:40 +01:00
Philipp Hagemeister	69f491f14e	Merge remote-tracking branch 'fstirlitz/master'	2014-12-11 17:11:25 +01:00
Philipp Hagemeister	cb007f47c1	release 2014.12.11	2014-12-11 17:08:31 +01:00
Philipp Hagemeister	9abd500a74	[zdf:channel] Simplify (#4427 )	2014-12-11 17:07:59 +01:00
Philipp Hagemeister	cf68bcaeff	Merge remote-tracking branch 'akretz/master'	2014-12-11 16:35:45 +01:00
Philipp Hagemeister	cbe2bd914d	[youtube] Amend test	2014-12-11 16:34:37 +01:00
Philipp Hagemeister	75111274ed	[youtube] Do not warn if DASH manifest is missing (#4442 )	2014-12-11 16:33:28 +01:00
Philipp Hagemeister	624dcebff6	[youtube] Make category optional (#4442 )	2014-12-11 16:32:48 +01:00
Philipp Hagemeister	9684f17cde	Merge remote-tracking branch 'akretz/youtube_fix'	2014-12-11 16:28:10 +01:00
Philipp Hagemeister	e52a40abf7	[youtube] Add test case for #4431	2014-12-11 16:28:07 +01:00
Philipp Hagemeister	0daa05961b	Merge branch 'master' of github.com:rg3/youtube-dl	2014-12-11 16:23:01 +01:00
Naglis Jonaitis	158731f83e	[tvplay] Don't raise an exception if `is_geo_blocked` is True Videos which return `is_geo_blocked' to be True can actually be downloaded from the country to which the video is restricted	2014-12-11 17:07:50 +02:00
Adrian Kretz	24270b0301	[youtube] The case that 'url_encoded_fmt_stream_map' or 'adaptive_fmts' is the empty string is handled accordingly (fixes #4431 )	2014-12-11 16:00:46 +01:00
Naglis Jonaitis	3c1b81b957	[ntv] Rename `flash_ver` to `flash_version` in the format dict RTMP downloader uses `flash_version`	2014-12-11 16:58:45 +02:00
Philipp Hagemeister	45c24df512	Merge branch 'master' of github.com:rg3/youtube-dl	2014-12-11 15:27:54 +01:00
Sergey M․	bf671b605e	[behindkink] Remove superfluous whitespace	2014-12-11 20:09:52 +06:00
Sergey M․	09c82fbc9a	[behindkink] Simplify	2014-12-11 20:06:19 +06:00
Sergey M.	3bca0409fe	Merge pull request #4440 from 5moufl/behindkink-fix [BehindKink] update	2014-12-11 19:58:31 +06:00
5moufl	d6f78a354d	[BehindKink] Replace test Old one is not accessible anymore	2014-12-11 14:26:59 +01:00
5moufl	e0b9d47387	[BehindKink] Update URL extraction	2014-12-11 14:25:26 +01:00
Philipp Hagemeister	f8795e102b	[utils] Add "yesterday" as a date keyword	2014-12-11 10:29:30 +01:00
Philipp Hagemeister	4bb4a18876	[youtube] Fix imports	2014-12-11 10:08:17 +01:00
Adrian Kretz	8560c61842	[zdf] Add support for channels	2014-12-10 17:29:03 +01:00
Sergey M․	a81bbebf44	[smotri:broadcast] Fix extraction	2014-12-10 20:22:49 +06:00
Philipp Hagemeister	72e3ffeb74	release 2014.12.10.3	2014-12-10 15:19:08 +01:00
Philipp Hagemeister	2fc9f2b41d	[facebook] Make thumbnail and duration optional Fixes #4425. Looks like both properties aren't given to us anymore. For now, just fall back to not returning them.	2014-12-10 15:18:36 +01:00
felix	ce36339575	add teamfourstar.com support	2014-12-08 17:01:22 +01:00
felix	684712076f	add direct screenwavemedia.com URL support	2014-12-08 17:01:22 +01:00
Grom PE	6ac4e8065a	Fix utils.py for PyPy on Windows The line ```python from __future__ import unicode_literals ``` introduced in commit [`ecc0c5ee01`](`ecc0c5ee01`) broke youtube-dl for PyPy on Windows, making it unable to locate WinAPI functions. Error: "TypeError: function name must be a string or integer" Adding "b" prefix to strings with WinAPI function names fixes it.	2014-12-06 20:15:41 +07:00
Tithen-Firion	e638e83662	[howstuffworks] Update extractor	2014-12-05 19:46:49 +01:00
Tithen-Firion	d958fa9ff9	[howstuffworks] Rewrite extractor	2014-12-05 12:21:21 +01:00
Tithen-Firion	ebb6419960	[common] Split _download_json Add ability for extractor to use _parse_json	2014-12-05 12:21:21 +01:00