release 2014.08.24.2

[ministrygrid] Add extractor (Fixes #2900 )
[wayofthemaster] Remove unused import
2014-08-24 04:47:38 +02:00 · 2014-08-24 04:47:28 +02:00 · 2014-08-24 04:18:09 +02:00 · 2014-08-24 04:14:02 +02:00 · 2014-08-24 03:37:19 +02:00 · 2014-08-24 03:31:38 +02:00
28 changed files with 919 additions and 110 deletions
--- a/README.md
+++ b/README.md
@@ -429,6 +429,7 @@ If you want to add support for a new site, you can follow this quick list (assum
                'id': '42',
                'ext': 'mp4',
                'title': 'Video title goes here',
+                'thumbnail': 're:^https?://.*\.jpg$',
                # TODO more properties, either as:
                # * A value
                # * MD5 checksum; start the string with md5:
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
            '138', '137', '248', '136', '247', '135', '246',
            '245', '244', '134', '243', '133', '242', '160',
            # Dash audio
-            '141', '172', '140', '139', '171',
+            '141', '172', '140', '171', '139',
        ]

        for f1id, f2id in zip(order, order[1:]):
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -480,7 +480,10 @@ class YoutubeDL(object):
                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
        age_limit = self.params.get('age_limit')
        if age_limit is not None:
-            if age_limit < info_dict.get('age_limit', 0):
+            actual_age_limit = info_dict.get('age_limit')
+            if actual_age_limit is None:
+                actual_age_limit = 0
+            if age_limit < actual_age_limit:
                return 'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
            return '%s has already been recorded in archive' % video_title
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -71,6 +71,7 @@ __authors__  = (
    'Sebastian Haas',
    'Alexander Kirk',
    'Erik Johnson',
+    'Keith Beckman',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -27,8 +27,16 @@ class HttpFD(FileDownloader):
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
        if 'http_referer' in info_dict:
            headers['Referer'] = info_dict['http_referer']
-        basic_request = compat_urllib_request.Request(url, None, headers)
-        request = compat_urllib_request.Request(url, None, headers)
+        add_headers = info_dict.get('http_headers')
+        if add_headers:
+            headers.update(add_headers)
+        data = info_dict.get('http_post_data')
+        http_method = info_dict.get('http_method')
+        basic_request = compat_urllib_request.Request(url, data, headers)
+        request = compat_urllib_request.Request(url, data, headers)
+        if http_method is not None:
+            basic_request.get_method = lambda: http_method
+            request.get_method = lambda: http_method

        is_test = self.params.get('test', False)

--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -126,6 +126,7 @@ from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
+from .grooveshark import GroovesharkIE
 from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
@@ -183,6 +184,7 @@ from .malemotion import MalemotionIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
+from .ministrygrid import MinistryGridIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
@@ -194,6 +196,7 @@ from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motherless import MotherlessIE
 from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
 from .movshare import MovShareIE
 from .mtv import (
@@ -243,6 +246,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
+from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
@@ -263,7 +267,7 @@ from .rtbf import RTBFIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
-from .rtve import RTVEALaCartaIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE
 from .ruhd import RUHDIE
 from .rutube import (
    RutubeIE,
@@ -274,6 +278,7 @@ from .rutube import (
 from .rutv import RUTVIE
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
+from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
@@ -386,6 +391,7 @@ from .vuclip import VuClipIE
 from .vulture import VultureIE
 from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
+from .wayofthemaster import WayOfTheMasterIE
 from .wdr import (
    WDRIE,
    WDRMobileIE,
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -177,16 +177,26 @@ class ArteTVPlus7IE(InfoExtractor):
 # It also uses the arte_vp_url url from the webpage to extract the information
 class ArteTVCreativeIE(ArteTVPlus7IE):
    IE_NAME = 'arte.tv:creative'
-    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
+    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/(?:magazine?/)?(?P<id>[^?#]+)'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
        'info_dict': {
-            'id': '050489-002',
+            'id': '72176',
            'ext': 'mp4',
-            'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
+            'title': 'Folge 2 - Corporate Design',
+            'upload_date': '20131004',
        },
-    }
+    }, {
+        'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
+        'info_dict': {
+            'id': '160676',
+            'ext': 'mp4',
+            'title': 'Monty Python live (mostly)',
+            'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
+            'upload_date': '20140805',
+        }
+    }]


 class ArteTVFutureIE(ArteTVPlus7IE):
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -15,7 +15,7 @@ from ..utils import (


 class BlipTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
+    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_TESTS]+)))'

    _TESTS = [
        {
@@ -49,6 +49,21 @@ class BlipTVIE(SubtitlesInfoExtractor):
                'uploader_id': '792887',
                'duration': 279,
            }
+        },
+        {
+            # https://bugzilla.redhat.com/show_bug.cgi?id=967465
+            'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
+            'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
+            'info_dict': {
+                'id': '6573122',
+                'ext': 'mov',
+                'upload_date': '20130520',
+                'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
+                'title': 'Red vs. Blue Season 11 Trailer',
+                'timestamp': 1369029609,
+                'uploader': 'redvsblue',
+                'uploader_id': '792887',
+            }
        }
    ]

@@ -150,7 +165,7 @@ class BlipTVIE(SubtitlesInfoExtractor):


 class BlipTVUserIE(InfoExtractor):
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
    _PAGE_SIZE = 12
    IE_NAME = 'blip.tv:user'

--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -154,12 +154,14 @@ class BrightcoveIE(InfoExtractor):
    def _extract_brightcove_urls(cls, webpage):
        """Return a list of all Brightcove URLs from the webpage """

-        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+        url_m = re.search(
+            r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
+            webpage)
        if url_m:
            url = unescapeHTML(url_m.group(1))
            # Some sites don't add it, we can't download with this url, for example:
            # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
-            if 'playerKey' in url:
+            if 'playerKey' in url or 'videoId' in url:
                return [url]

        matches = re.findall(
@@ -188,9 +190,13 @@ class BrightcoveIE(InfoExtractor):
            referer = smuggled_data.get('Referer', url)
            return self._get_video_info(
                videoPlayer[0], query_str, query, referer=referer)
-        else:
+        elif 'playerKey' in query:
            player_key = query['playerKey']
            return self._get_playlist_info(player_key[0])
+        else:
+            raise ExtractorError(
+                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
+                expected=True)

    def _get_video_info(self, video_id, query_str, query, referer=None):
        request_url = self._FEDERATED_URL_TEMPLATE % query_str
@@ -202,6 +208,13 @@ class BrightcoveIE(InfoExtractor):
            req.add_header('Referer', referer)
        webpage = self._download_webpage(req, video_id)

+        error_msg = self._html_search_regex(
+            r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
+            'error message', default=None)
+        if error_msg is not None:
+            raise ExtractorError(
+                'brightcove said: %s' % error_msg, expected=True)
+
        self.report_extraction(video_id)
        info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
        info = json.loads(info)['data']
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -84,6 +84,12 @@ class InfoExtractor(object):
                                 format, irrespective of the file format.
                                 -1 for default (order by other properties),
                                 -2 or smaller for less than default.
+                    * http_referer  HTTP Referer header value to set.
+                    * http_method  HTTP method to use for the download.
+                    * http_headers  A dictionary of additional HTTP headers
+                                 to add to the request.
+                    * http_post_data  Additional data to send with a POST
+                                 request.
    url:            Final video URL.
    ext:            Video filename extension.
    format:         The video format, defaults to ext (used for --get-format)
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -1,19 +1,21 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
-from ..utils import determine_ext


 class EbaumsWorldIE(InfoExtractor):
    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'

    _TEST = {
-        u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
-        u'file': u'83367677.mp4',
-        u'info_dict': {
-            u'title': u'A Giant Python Opens The Door',
-            u'description': u'This is how nightmares start...',
-            u'uploader': u'jihadpizza',
+        'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
+        'info_dict': {
+            'id': '83367677',
+            'ext': 'mp4',
+            'title': 'A Giant Python Opens The Door',
+            'description': 'This is how nightmares start...',
+            'uploader': 'jihadpizza',
        },
    }

@@ -28,7 +30,6 @@ class EbaumsWorldIE(InfoExtractor):
            'id': video_id,
            'title': config.find('title').text,
            'url': video_url,
-            'ext': determine_ext(video_url),
            'description': config.find('description').text,
            'thumbnail': config.find('image').text,
            'uploader': config.find('username').text,
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -15,11 +15,14 @@ from ..utils import (
    compat_xml_parse_error,

    ExtractorError,
+    float_or_none,
    HEADRequest,
+    orderedSet,
    parse_xml,
    smuggle_url,
    unescapeHTML,
    unified_strdate,
+    unsmuggle_url,
    url_basename,
 )
 from .brightcove import BrightcoveIE
@@ -289,6 +292,46 @@ class GenericIE(InfoExtractor):
                'description': 'Mario\'s life in the fast lane has never looked so good.',
            },
        },
+        # YouTube embed via <data-embed-url="">
+        {
+            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
+            'info_dict': {
+                'id': 'jpSGZsgga_I',
+                'ext': 'mp4',
+                'title': 'Asphalt 8: Airborne - Launch Trailer',
+                'uploader': 'Gameloft',
+                'uploader_id': 'gameloft',
+                'upload_date': '20130821',
+                'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
+            },
+            'params': {
+                'skip_download': True,
+            }
+        },
+        # Camtasia studio
+        {
+            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
+            'playlist': [{
+                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
+                'info_dict': {
+                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
+                    'ext': 'flv',
+                    'duration': 2235.90,
+                }
+            }, {
+                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
+                'info_dict': {
+                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
+                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
+                    'ext': 'flv',
+                    'duration': 2235.93,
+                }
+            }],
+            'info_dict': {
+                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+            }
+        }
    ]

    def report_download_webpage(self, video_id):
@@ -372,6 +415,43 @@ class GenericIE(InfoExtractor):
            'entries': entries,
        }

+    def _extract_camtasia(self, url, video_id, webpage):
+        """ Returns None if no camtasia video can be found. """
+
+        camtasia_cfg = self._search_regex(
+            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
+            webpage, 'camtasia configuration file', default=None)
+        if camtasia_cfg is None:
+            return None
+
+        title = self._html_search_meta('DC.title', webpage, fatal=True)
+
+        camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+        camtasia_cfg = self._download_xml(
+            camtasia_url, video_id,
+            note='Downloading camtasia configuration',
+            errnote='Failed to download camtasia configuration')
+        fileset_node = camtasia_cfg.find('./playlist/array/fileset')
+
+        entries = []
+        for n in fileset_node.getchildren():
+            url_n = n.find('./uri')
+            if url_n is None:
+                continue
+
+            entries.append({
+                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
+                'title': '%s - %s' % (title, n.tag),
+                'url': compat_urlparse.urljoin(url, url_n.text),
+                'duration': float_or_none(n.find('./duration').text),
+            })
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'title': title,
+        }
+
    def _real_extract(self, url):
        if url.startswith('//'):
            return {
@@ -408,7 +488,14 @@ class GenericIE(InfoExtractor):
            else:
                assert ':' in default_search
                return self.url_result(default_search + url)
-        video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
+
+        url, smuggled_data = unsmuggle_url(url)
+        force_videoid = None
+        if smuggled_data and 'force_videoid' in smuggled_data:
+            force_videoid = smuggled_data['force_videoid']
+            video_id = force_videoid
+        else:
+            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]

        self.to_screen('%s: Requesting header' % video_id)

@@ -419,6 +506,9 @@ class GenericIE(InfoExtractor):
            new_url = response.geturl()
            if url != new_url:
                self.report_following_redirect(new_url)
+                if force_videoid:
+                    new_url = smuggle_url(
+                        new_url, {'force_videoid': force_videoid})
                return self.url_result(new_url)

            # Check for direct link to a video
@@ -460,6 +550,11 @@ class GenericIE(InfoExtractor):
        except compat_xml_parse_error:
            pass

+        # Is it a Camtasia project?
+        camtasia_res = self._extract_camtasia(url, video_id, webpage)
+        if camtasia_res is not None:
+            return camtasia_res
+
        # Sometimes embedded video player is hidden behind percent encoding
        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
        # Unescaping the whole page allows to handle those cases in a generic way
@@ -479,6 +574,12 @@ class GenericIE(InfoExtractor):
        video_uploader = self._search_regex(
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')

+        # Helper method
+        def _playlist_from_matches(matches, getter, ie=None):
+            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
+            return self.playlist_result(
+                urlrs, playlist_id=video_id, playlist_title=video_title)
+
        # Look for BrightCove:
        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
        if bc_urls:
@@ -514,6 +615,7 @@ class GenericIE(InfoExtractor):
        matches = re.findall(r'''(?x)
            (?:
                <iframe[^>]+?src=|
+                data-video-url=|
                <embed[^>]+?src=|
                embedSWF\(?:\s*
            )
@@ -522,19 +624,15 @@ class GenericIE(InfoExtractor):
                (?:embed|v)/.+?)
            \1''', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
-                     for tuppl in matches]
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
+            return _playlist_from_matches(
+                matches, lambda m: unescapeHTML(m[1]), ie='Youtube')

        # Look for embedded Dailymotion player
        matches = re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(tuppl[1]))
-                     for tuppl in matches]
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
+            return _playlist_from_matches(
+                matches, lambda m: unescapeHTML(m[1]))

        # Look for embedded Wistia player
        match = re.search(
@@ -553,7 +651,7 @@ class GenericIE(InfoExtractor):
        mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
        if mobj:
            return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
-        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
+        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
        if mobj:
            return self.url_result(mobj.group(1), 'BlipTV')

@@ -648,10 +746,8 @@ class GenericIE(InfoExtractor):
        # Look for funnyordie embed
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
-                     for eurl in matches]
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
+            return _playlist_from_matches(
+                matches, getter=unescapeHTML, ie='FunnyOrDie')

        # Look for embedded RUTV player
        rutv_url = RUTVIE._extract_url(webpage)
@@ -713,6 +809,13 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Yahoo')

+        # Look for embedded sbs.com.au player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'SBS')
+
        # Start with something easy: JW Player in SWFObject
        found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if not found:
@@ -739,7 +842,12 @@ class GenericIE(InfoExtractor):
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
            if m_video_type is not None:
-                found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+                def check_video(vurl):
+                    vpath = compat_urlparse.urlparse(vurl).path
+                    return '.' in vpath and not vpath.endswith('.swf')
+                found = list(filter(
+                    check_video,
+                    re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
        if not found:
            # HTML5 video
            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
--- a/youtube_dl/extractor/grooveshark.py
+++ b/youtube_dl/extractor/grooveshark.py
@@ -0,0 +1,190 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import time
+import math
+import os.path
+import re
+
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
+
+from ..utils import (
+    compat_urllib_parse,
+    compat_urlparse,
+)
+
+
+class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
+    def __init__(self):
+        self._current_object = None
+        self.objects = []
+        compat_html_parser.HTMLParser.__init__(self)
+
+    def handle_starttag(self, tag, attrs):
+        attrs = dict((k, v) for k, v in attrs)
+        if tag == 'object':
+            self._current_object = {'attrs': attrs, 'params': []}
+        elif tag == 'param':
+            self._current_object['params'].append(attrs)
+
+    def handle_endtag(self, tag):
+        if tag == 'object':
+            self.objects.append(self._current_object)
+            self._current_object = None
+
+    @classmethod
+    def extract_object_tags(cls, html):
+        p = cls()
+        p.feed(html)
+        p.close()
+        return p.objects
+
+
+class GroovesharkIE(InfoExtractor):
+    _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
+    _TEST = {
+        'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
+        'md5': '7ecf8aefa59d6b2098517e1baa530023',
+        'info_dict': {
+            'id': '6SS1DW',
+            'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
+            'ext': 'mp3',
+            'duration': 227,
+        }
+    }
+
+    do_playerpage_request = True
+    do_bootstrap_request = True
+
+    def _parse_target(self, target):
+        uri = compat_urlparse.urlparse(target)
+        hash = uri.fragment[1:].split('?')[0]
+        token = os.path.basename(hash.rstrip('/'))
+        return (uri, hash, token)
+
+    def _build_bootstrap_url(self, target):
+        (uri, hash, token) = self._parse_target(target)
+        query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
+        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
+
+    def _build_meta_url(self, target):
+        (uri, hash, token) = self._parse_target(target)
+        query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
+        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
+
+    def _build_stream_url(self, meta):
+        return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
+
+    def _build_swf_referer(self, target, obj):
+        (uri, _, _) = self._parse_target(target)
+        return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
+
+    def _transform_bootstrap(self, js):
+        return re.split('(?m)^\s*try\s*{', js)[0] \
+                 .split(' = ', 1)[1].strip().rstrip(';')
+
+    def _transform_meta(self, js):
+        return js.split('\n')[0].split('=')[1].rstrip(';')
+
+    def _get_meta(self, target):
+        (meta_url, token) = self._build_meta_url(target)
+        self.to_screen('Metadata URL: %s' % meta_url)
+
+        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
+        req = compat_urllib_request.Request(meta_url, headers=headers)
+        res = self._download_json(req, token,
+                                  transform_source=self._transform_meta)
+
+        if 'getStreamKeyWithSong' not in res:
+            raise ExtractorError(
+                'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
+
+        if res['getStreamKeyWithSong'] is None:
+            raise ExtractorError(
+                'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
+                expected=True)
+
+        return res['getStreamKeyWithSong']
+
+    def _get_bootstrap(self, target):
+        (bootstrap_url, token) = self._build_bootstrap_url(target)
+
+        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
+        req = compat_urllib_request.Request(bootstrap_url, headers=headers)
+        res = self._download_json(req, token, fatal=False,
+                                  note='Downloading player bootstrap data',
+                                  errnote='Unable to download player bootstrap data',
+                                  transform_source=self._transform_bootstrap)
+        return res
+
+    def _get_playerpage(self, target):
+        (_, _, token) = self._parse_target(target)
+
+        webpage = self._download_webpage(
+            target, token,
+            note='Downloading player page',
+            errnote='Unable to download player page',
+            fatal=False)
+
+        if webpage is not None:
+            # Search (for example German) error message
+            error_msg = self._html_search_regex(
+                r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
+                'error message', default=None)
+            if error_msg is not None:
+                error_msg = error_msg.replace('\n', ' ')
+                raise ExtractorError('Grooveshark said: %s' % error_msg)
+
+        if webpage is not None:
+            o = GroovesharkHtmlParser.extract_object_tags(webpage)
+            return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
+
+        return (webpage, None)
+
+    def _real_initialize(self):
+        self.ts = int(time.time() * 1000)  # timestamp in millis
+
+    def _real_extract(self, url):
+        (target_uri, _, token) = self._parse_target(url)
+
+        # 1. Fill cookiejar by making a request to the player page
+        swf_referer = None
+        if self.do_playerpage_request:
+            (_, player_objs) = self._get_playerpage(url)
+            if player_objs is not None:
+                swf_referer = self._build_swf_referer(url, player_objs[0])
+                self.to_screen('SWF Referer: %s' % swf_referer)
+
+        # 2. Ask preload.php for swf bootstrap data to better mimic webapp
+        if self.do_bootstrap_request:
+            bootstrap = self._get_bootstrap(url)
+            self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
+
+        # 3. Ask preload.php for track metadata.
+        meta = self._get_meta(url)
+
+        # 4. Construct stream request for track.
+        stream_url = self._build_stream_url(meta)
+        duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
+        post_dict = {'streamKey': meta['streamKey']['streamKey']}
+        post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
+        headers = {
+            'Content-Length': len(post_data),
+            'Content-Type': 'application/x-www-form-urlencoded'
+        }
+        if swf_referer is not None:
+            headers['Referer'] = swf_referer
+
+        return {
+            'id': token,
+            'title': meta['song']['Name'],
+            'http_method': 'POST',
+            'url': stream_url,
+            'ext': 'mp3',
+            'format': 'mp3 audio',
+            'duration': duration,
+            'http_post_data': post_data,
+            'http_headers': headers,
+        }
--- a/youtube_dl/extractor/ministrygrid.py
+++ b/youtube_dl/extractor/ministrygrid.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    smuggle_url,
+)
+
+
+class MinistryGridIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
+
+    _TEST = {
+        'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
+        'md5': '844be0d2a1340422759c2a9101bab017',
+        'info_dict': {
+            'id': '3453494717001',
+            'ext': 'mp4',
+            'title': 'The Gospel by Numbers',
+            'description': 'Coming soon from T4G 2014!',
+            'uploader': 'LifeWay Christian Resources (MG)',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        portlets_json = self._search_regex(
+            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
+        portlets = json.loads(portlets_json)
+        pl_id = self._search_regex(
+            r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
+
+        for i, portlet in enumerate(portlets):
+            portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
+            portlet_code = self._download_webpage(
+                portlet_url, video_id,
+                note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
+                fatal=False)
+            video_iframe_url = self._search_regex(
+                r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
+                default=None)
+            if video_iframe_url:
+                surl = smuggle_url(
+                    video_iframe_url, {'force_videoid': video_id})
+                return {
+                    '_type': 'url',
+                    'id': video_id,
+                    'url': surl,
+                }
+
+        raise ExtractorError('Could not find video iframe in any portlets')
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dl/extractor/movieclips.py
@@ -0,0 +1,78 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_str,
+    clean_html,
+)
+
+
+class MovieClipsIE(InfoExtractor):
+    _VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
+    _TEST = {
+        'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
+        'info_dict': {
+            'id': 'Wy7ZU',
+            'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
+            'ext': 'mp4',
+            'title': 'My Week with Marilyn - Do You Love Me?',
+            'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+        show_id = display_id or video_id
+
+        config = self._download_xml(
+            'http://config.movieclips.com/player/config/%s' % video_id,
+            show_id, 'Downloading player config')
+
+        if config.find('./country-region').text == 'false':
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
+
+        properties = config.find('./video/properties')
+        smil_file = properties.attrib['smil_file']
+
+        smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
+        base_url = smil.find('./head/meta').attrib['base']
+
+        formats = []
+        for video in smil.findall('./body/switch/video'):
+            vbr = int(video.attrib['system-bitrate']) / 1000
+            src = video.attrib['src']
+            formats.append({
+                'url': base_url,
+                'play_path': src,
+                'ext': src.split(':')[0],
+                'vbr': vbr,
+                'format_id': '%dk' % vbr,
+            })
+
+        self._sort_formats(formats)
+
+        title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
+        description = clean_html(compat_str(properties.attrib['clip_description']))
+        thumbnail = properties.attrib['image']
+        categories = properties.attrib['clip_categories'].split(',')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
            webpage = self._download_webpage(
                request, video_id, 'Downloading %s page' % format_id)
            video_url = self._html_search_regex(
-                r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
+                r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
            if not video_url:
                continue
            formats.append({
@@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
        webpage = self._download_webpage(
            'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
        title = self._html_search_regex(
-            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
-        thumbnail = self._html_search_regex(
-            r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
-            webpage, 'thumbnail URL', fatal=False)
+            [r'<span title="([^"]+)">',
+             r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
+        thumbnails = [
+            {
+                'url': thumb_url,
+            } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
+        ]
+        thumbnail = thumbnails[0]['url'] if thumbnails else None
        duration = parse_duration(self._html_search_regex(
-            r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
+            r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
        upload_date = unified_strdate(self._html_search_regex(
-            r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
+            r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))

        return {
            'id': video_id,
            'title': title,
-            'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
+            'thumbnails': thumbnails,
+            'thumbnail': thumbnail,
            'duration': duration,
            'upload_date': upload_date,
            'age_limit': 18,
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -6,7 +6,6 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    compat_urlparse,
    js_to_json,
 )

--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -54,6 +54,18 @@ class PBSIE(InfoExtractor):
                'duration': 801,
            },
        },
+        {
+            'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
+            'md5': 'c62859342be2a0358d6c9eb306595978',
+            'info_dict': {
+                'id': '2365297708',
+                'ext': 'mp4',
+                'description': 'md5:68d87ef760660eb564455eb30ca464fe',
+                'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
+                'duration': 6559,
+                'thumbnail': 're:^https?://.*\.jpg$',
+            }
+        }
    ]

    def _extract_ids(self, url):
@@ -75,7 +87,7 @@ class PBSIE(InfoExtractor):
                return media_id, presumptive_id

            url = self._search_regex(
-                r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
+                r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                webpage, 'player URL')
            mobj = re.match(self._VALID_URL, url)

--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -0,0 +1,82 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+)
+
+
+class PlayFMIE(InfoExtractor):
+    IE_NAME = 'play.fm'
+    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+
+    _TEST = {
+        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+        'md5': 'c505f8307825a245d0c7ad1850001f22',
+        'info_dict': {
+            'id': '137220',
+            'ext': 'mp3',
+            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+            'uploader': 'Sven Tasnadi',
+            'uploader_id': 'sventasnadi',
+            'duration': 5627.428,
+            'upload_date': '20140712',
+            'view_count': int,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        upload_date = mobj.group('upload_date')
+
+        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
+        req = compat_urllib_request.Request(
+            'http://www.play.fm/flexRead/recording', data=rec_data)
+        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        rec_doc = self._download_xml(req, video_id)
+
+        error_node = rec_doc.find('./error')
+        if error_node is not None:
+            raise ExtractorError('An error occured: %s (code %s)' % (
+                error_node.text, rec_doc.find('./status').text))
+
+        recording = rec_doc.find('./recording')
+        title = recording.find('./title').text
+        view_count = int_or_none(recording.find('./stats/playcount').text)
+        duration = float_or_none(recording.find('./duration').text, scale=1000)
+        thumbnail = recording.find('./image').text
+
+        artist = recording.find('./artists/artist')
+        uploader = artist.find('./name').text
+        uploader_id = artist.find('./slug').text
+
+        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
+            'http:', recording.find('./url').text,
+            recording.find('./_class').text, recording.find('./file_id').text,
+            rec_doc.find('./uuid').text, video_id,
+            rec_doc.find('./jingle/file_id').text,
+            'http%3A%2F%2Fwww.play.fm%2Fplayer',
+        )
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp3',
+            'filesize': int_or_none(recording.find('./size').text),
+            'title': title,
+            'upload_date': upload_date,
+            'view_count': view_count,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+        }
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -35,7 +35,6 @@ class RtlXlIE(InfoExtractor):
        info = self._download_json(
            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
            uuid)
-        meta = info['meta']
        material = info['material'][0]
        episode_info = info['episodes'][0]

--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -1,21 +1,66 @@
 # encoding: utf-8
 from __future__ import unicode_literals

-import re
 import base64
+import re
+import time

 from .common import InfoExtractor
 from ..utils import (
    struct_unpack,
+    remove_end,
 )


+def _decrypt_url(png):
+    encrypted_data = base64.b64decode(png)
+    text_index = encrypted_data.find(b'tEXt')
+    text_chunk = encrypted_data[text_index - 4:]
+    length = struct_unpack('!I', text_chunk[:4])[0]
+    # Use bytearray to get integers when iterating in both python 2.x and 3.x
+    data = bytearray(text_chunk[8:8 + length])
+    data = [chr(b) for b in data if b != 0]
+    hash_index = data.index('#')
+    alphabet_data = data[:hash_index]
+    url_data = data[hash_index + 1:]
+
+    alphabet = []
+    e = 0
+    d = 0
+    for l in alphabet_data:
+        if d == 0:
+            alphabet.append(l)
+            d = e = (e + 1) % 4
+        else:
+            d -= 1
+    url = ''
+    f = 0
+    e = 3
+    b = 1
+    for letter in url_data:
+        if f == 0:
+            l = int(letter) * 10
+            f = 1
+        else:
+            if e == 0:
+                l += int(letter)
+                url += alphabet[l]
+                e = (b + 3) % 4
+                f = 0
+                b += 1
+            else:
+                e -= 1
+
+    return url
+
+
+
 class RTVEALaCartaIE(InfoExtractor):
    IE_NAME = 'rtve.es:alacarta'
    IE_DESC = 'RTVE a la carta'
    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
        'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
        'info_dict': {
@@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
        },
-    }
-
-    def _decrypt_url(self, png):
-        encrypted_data = base64.b64decode(png)
-        text_index = encrypted_data.find(b'tEXt')
-        text_chunk = encrypted_data[text_index-4:]
-        length = struct_unpack('!I', text_chunk[:4])[0]
-        # Use bytearray to get integers when iterating in both python 2.x and 3.x
-        data = bytearray(text_chunk[8:8+length])
-        data = [chr(b) for b in data if b != 0]
-        hash_index = data.index('#')
-        alphabet_data = data[:hash_index]
-        url_data = data[hash_index+1:]
-
-        alphabet = []
-        e = 0
-        d = 0
-        for l in alphabet_data:
-            if d == 0:
-                alphabet.append(l)
-                d = e = (e + 1) % 4
-            else:
-                d -= 1
-        url = ''
-        f = 0
-        e = 3
-        b = 1
-        for letter in url_data:
-            if f == 0:
-                l = int(letter)*10
-                f = 1
-            else:
-                if e == 0:
-                    l += int(letter)
-                    url += alphabet[l]
-                    e = (b + 3) % 4
-                    f = 0
-                    b += 1
-                else:
-                    e -= 1
-
-        return url
+    }, {
+        'note': 'Live stream',
+        'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
+        'info_dict': {
+            'id': '1694255',
+            'ext': 'flv',
+            'title': 'TODO',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -74,11 +86,57 @@ class RTVEALaCartaIE(InfoExtractor):
            video_id)['page']['items'][0]
        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        video_url = self._decrypt_url(png)
+        video_url = _decrypt_url(png)

        return {
            'id': video_id,
            'title': info['title'],
            'url': video_url,
-            'thumbnail': info['image'],
+            'thumbnail': info.get('image'),
+            'page_url': url,
+        }
+
+
+class RTVELiveIE(InfoExtractor):
+    IE_NAME = 'rtve.es:live'
+    IE_DESC = 'RTVE.es live streams'
+    _VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
+
+    _TESTS = [{
+        'url': 'http://www.rtve.es/noticias/directo-la-1/',
+        'info_dict': {
+            'id': 'directo-la-1',
+            'ext': 'flv',
+            'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
+        },
+        'params': {
+            'skip_download': 'live stream',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        start_time = time.gmtime()
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        player_url = self._search_regex(
+            r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
+        title = remove_end(self._og_search_title(webpage), ' en directo')
+        title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
+
+        vidplayer_id = self._search_regex(
+            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+        png = self._download_webpage(png_url, video_id, 'Downloading url information')
+        video_url = _decrypt_url(png)
+
+        return {
+            'id': video_id,
+            'ext': 'flv',
+            'title': title,
+            'url': video_url,
+            'app': 'rtve-live-live?ovpfv=2.1.2',
+            'player_url': player_url,
+            'rtmp_live': True,
        }
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import json
+import re
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    remove_end,
+)
+
+
+class SBSIE(InfoExtractor):
+    IE_DESC = 'sbs.com.au'
+    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
+
+    _TESTS = [{
+        # Original URL is handled by the generic IE which finds the iframe:
+        # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
+        'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
+        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
+        'info_dict': {
+            'id': '320403011771',
+            'ext': 'flv',
+            'title': 'Dingo Conservation',
+            'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+        'add_ies': ['generic'],
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        release_urls_json = js_to_json(self._search_regex(
+            r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
+            webpage, ''))
+        release_urls = json.loads(release_urls_json)
+        theplatform_url = (
+            release_urls.get('progressive') or release_urls.get('standard'))
+
+        title = remove_end(self._og_search_title(webpage), ' (The Feed)')
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'url': theplatform_url,
+
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -44,7 +44,7 @@ class VodlockerIE(InfoExtractor):
                req, video_id, 'Downloading video page')

        title = self._search_regex(
-            r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
+            r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
        thumbnail = self._search_regex(
            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
        url = self._search_regex(
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -2,27 +2,30 @@
 from __future__ import unicode_literals

 import re
+import time
+import hashlib

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
    unified_strdate,
 )


 class WatIE(InfoExtractor):
-    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
+    _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
    _TEST = {
-        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
+        'md5': 'ce70e9223945ed26a8056d413ca55dc9',
        'info_dict': {
-            'id': '10631273',
+            'id': '11713067',
+            'display_id': 'soupe-figues-l-orange-aux-epices',
            'ext': 'mp4',
-            'title': 'World War Z - Philadelphia VOST',
-            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
-        },
-        'params': {
-            # Sometimes wat serves the whole file with the --test option
-            'skip_download': True,
+            'title': 'Soupe de figues à l\'orange et aux épices',
+            'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
+            'upload_date': '20140819',
+            'duration': 120,
        },
    }

@@ -36,13 +39,20 @@ class WatIE(InfoExtractor):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
        mobj = re.match(self._VALID_URL, url)
-        short_id = mobj.group('shortID')
-        webpage = self._download_webpage(url, short_id)
+        short_id = mobj.group('short_id')
+        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, display_id or short_id)
        real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')

        video_info = self.download_video_info(real_id)
+
+        if video_info.get('geolock'):
+            raise ExtractorError('This content is not available in your area', expected=True)
+
        chapters = video_info['chapters']
        first_chapter = chapters[0]
+        files = video_info['files']
+        first_file = files[0]

        if real_id_for_chapter(first_chapter) != real_id:
            self.to_screen('Multipart video detected')
@@ -61,12 +71,45 @@ class WatIE(InfoExtractor):
            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
        # the short id for getting the video url
+
+        formats = [{
+            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'format_id': 'Mobile',
+        }]
+
+        fmts = [('SD', 'web')]
+        if first_file.get('hasHD'):
+            fmts.append(('HD', 'webhd'))
+
+        def compute_token(param):
+            timestamp = '%08x' % int(time.time())
+            magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
+            return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
+
+        for fmt in fmts:
+            webid = '/%s/%s' % (fmt[1], real_id)
+            video_url = self._download_webpage(
+                'http://www.wat.tv/get%s?token=%s&getURL=1' % (webid, compute_token(webid)),
+                real_id,
+                'Downloding %s video URL' % fmt[0],
+                'Failed to download %s video URL' % fmt[0],
+                False)
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'ext': 'mp4',
+                'format_id': fmt[0],
+            })
+
        return {
            'id': real_id,
-            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'display_id': display_id,
            'title': first_chapter['title'],
            'thumbnail': first_chapter['preview'],
            'description': first_chapter['description'],
            'view_count': video_info['views'],
            'upload_date': upload_date,
+            'duration': first_file['duration'],
+            'formats': formats,
        }
--- a/youtube_dl/extractor/wayofthemaster.py
+++ b/youtube_dl/extractor/wayofthemaster.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class WayOfTheMasterIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
+
+    _TEST = {
+        'url': 'http://www.wayofthemaster.com/hbks.shtml',
+        'md5': '5316b57487ada8480606a93cb3d18d24',
+        'info_dict': {
+            'id': 'hbks',
+            'ext': 'mp4',
+            'title': 'Intelligent Design vs. Evolution',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._search_regex(
+            r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
+            webpage, 'title', default=None)
+        if title is None:
+            title = self._html_search_regex(
+                r'<title>(.*?)</title>', webpage, 'page title')
+
+        url_base = self._search_regex(
+            r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
+            webpage, 'URL base')
+        formats = [{
+            'format_id': 'low',
+            'quality': 1,
+            'url': url_base + '_low.mp4',
+        }, {
+            'format_id': 'high',
+            'quality': 2,
+            'url': url_base + '_high.mp4',
+        }]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -14,7 +14,7 @@ from ..utils import (

 class XHamsterIE(InfoExtractor):
    """Information Extractor for xHamster"""
-    _VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
    _TESTS = [
        {
            'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1285,6 +1285,12 @@ def remove_start(s, start):
    return s


+def remove_end(s, end):
+    if s.endswith(end):
+        return s[:-len(end)]
+    return s
+
+
 def url_basename(url):
    path = compat_urlparse.urlparse(url).path
    return path.strip(u'/').split(u'/')[-1]
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.08.22.1'
+__version__ = '2014.08.24.2'
Author	SHA1	Message	Date
Philipp Hagemeister	24e5e24166	release 2014.08.24.2	2014-08-24 04:47:38 +02:00
Philipp Hagemeister	4d54ef20a2	[ministrygrid] Add extractor (Fixes #2900 )	2014-08-24 04:47:28 +02:00
Philipp Hagemeister	54036b3991	[wayofthemaster] Remove unused import	2014-08-24 04:18:09 +02:00
Philipp Hagemeister	e5402ac120	[wayofthemaster] Add extractor (Fixes #3575 )	2014-08-24 04:14:02 +02:00
Philipp Hagemeister	f56f8399c7	[ebaumsworld] Remove spurious determine_ext	2014-08-24 03:37:19 +02:00
Philipp Hagemeister	cf0c5fa3a1	[ebaumsworld] Modernize	2014-08-24 03:31:38 +02:00
Philipp Hagemeister	8c2ccefae6	release 2014.08.24.1	2014-08-24 03:20:40 +02:00
Philipp Hagemeister	1f8b6af773	[bip.tv] Allow underscore in lookup ids (Fixes #3573 )	2014-08-24 03:20:31 +02:00
Philipp Hagemeister	8f9b683eeb	[blip.tv] Add legacy test case This was broken in the mean time, so add a test case to make sure it doesn't break silently again.	2014-08-24 03:13:58 +02:00
Philipp Hagemeister	b5f4775b38	[arte.tv:creative] Fix test case	2014-08-24 03:11:00 +02:00
Philipp Hagemeister	01d906ffe9	[arte:creative] Support more URLs (fixes #3572 )	2014-08-24 02:57:32 +02:00
Philipp Hagemeister	614582bcc4	release 2014.08.24	2014-08-24 02:44:36 +02:00
Philipp Hagemeister	e1ab5000b2	[brightcove] Add support for videoId= in og:video meta (Fixes #3571 )	2014-08-24 02:41:21 +02:00
Philipp Hagemeister	a5ed3e571e	[brightcove] Detect geoblocking	2014-08-24 02:40:26 +02:00
Philipp Hagemeister	10eaeb20c5	[generic] Require og:video URLs to contain a dot	2014-08-24 02:29:56 +02:00
Philipp Hagemeister	fa8deaf38b	[generic] Prevent from downloading a .swf as a video We're seeing quite a number of people who do not put a video file in the og:video field, but the player URL. Try to detect some of these and filter them out.	2014-08-24 02:24:49 +02:00
Philipp Hagemeister	6857590059	[brightcove] Add a truncated URL warning message (#3571 )	2014-08-24 02:11:26 +02:00
Philipp Hagemeister	a3db22ebdf	[grooveshark] Use proper imports	2014-08-24 02:06:59 +02:00
Philipp Hagemeister	c8e9a235d9	[generic] Add support for camtasia videos (Fixes #3574 )	2014-08-24 02:02:17 +02:00
Philipp Hagemeister	30b871b0ca	Merge remote-tracking branch 'origin/master'	2014-08-24 01:34:28 +02:00
Philipp Hagemeister	eb9da9b732	[grooveshark] Fix test md5sum	2014-08-24 01:33:55 +02:00
Philipp Hagemeister	d769be6c96	[grooveshark,http] Make HTTP POST downloads work	2014-08-24 01:31:35 +02:00
Sergey M․	a54bda3ae2	[wat] Add support for SD and HD videos (Closes #3558 )	2014-08-24 02:22:10 +07:00
Philipp Hagemeister	00558d9414	Merge remote-tracking branch 'sehrgut/Grooveshark' Conflicts: youtube_dl/__init__.py youtube_dl/extractor/__init__.py	2014-08-23 16:41:14 +02:00
Philipp Hagemeister	49f3c16543	release 2014.08.23	2014-08-23 15:24:31 +02:00
Philipp Hagemeister	2ef6fcb5d8	[sbs] Add new extractor (Fixes #3566 )	2014-08-23 15:20:56 +02:00
Philipp Hagemeister	38fc045253	[rtlnl] Remove unused code	2014-08-23 15:05:21 +02:00
Philipp Hagemeister	af1fd929c6	[patreon] Remove unused import	2014-08-23 15:04:11 +02:00
Philipp Hagemeister	b7b04c9234	[vodlocker] Allow title to end with a <br>	2014-08-23 14:39:47 +02:00
Sergey M․	bc0bb6fd30	[movieclips] Add extractor (Closes #3554 )	2014-08-23 17:44:56 +07:00
Philipp Hagemeister	430826c9d4	Merge pull request #3568 from MikeCol/xhamster_load changed _VALID_URL to allow for country specific subdomains	2014-08-22 22:46:42 +02:00
MikeCol	68909f0c4e	changed _VALID_URL to allow for country specific prefixes	2014-08-22 22:17:07 +02:00
Philipp Hagemeister	9d048a17d8	[rtve.es:live] Start supporting the 24h channel	2014-08-22 18:47:49 +02:00
Philipp Hagemeister	492641d10a	release 2014.08.22.3	2014-08-22 18:41:43 +02:00
Philipp Hagemeister	2b9faf5542	[rtve] Add support for live stream At the moment, only RTVE-1 seems to work flawlessly. -2 seems geoblocked right now. -TDP doesn't seem to be available outside of Spain.	2014-08-22 18:40:28 +02:00
Philipp Hagemeister	ed2d6a1960	[generic] Simplify playlist support (#2948 )	2014-08-22 18:19:56 +02:00
Philipp Hagemeister	be843678b1	[YouTubeDL] Correct handling of age_limit = None in result	2014-08-22 17:46:57 +02:00
Philipp Hagemeister	c71dfccc98	Merge remote-tracking branch 'anovicecodemonkey/generic-data-video-url' Conflicts: youtube_dl/extractor/generic.py	2014-08-22 17:40:36 +02:00
Philipp Hagemeister	1a9ccac7c1	Merge remote-tracking branch 'origin/master'	2014-08-22 17:38:11 +02:00
Philipp Hagemeister	e330d59abb	[playfm] Add extractor (Fixes #3538 )	2014-08-22 17:38:06 +02:00
Sergey M․	394df6d7d0	[nuvid] Adapt to latest layout changes	2014-08-22 21:41:51 +07:00
Philipp Hagemeister	218f754940	[README] Add thumbnail to _TEST example While it's not mandatory, extractors are highly encouraged to provide a thumbnail field.	2014-08-22 11:30:49 +02:00
Philipp Hagemeister	a053c3493a	[test_YoutubeDL] Reorder formats (#3542 )	2014-08-22 03:44:30 +02:00
Philipp Hagemeister	50b294aab8	release 2014.08.22.2	2014-08-22 03:16:16 +02:00
Philipp Hagemeister	756b046f3e	[pbs] recognize class=partnerPlayer as well (Fixes #3564 )	2014-08-22 03:16:08 +02:00
anovicecodemonkey	37e3cbe22e	Move duplicate check to generic.py	2014-06-01 01:16:35 +09:30
anovicecodemonkey	610134730a	Add a _TEST_	2014-05-21 19:25:37 +09:30
anovicecodemonkey	212a5e28ba	Add a duplicate check to /extractor/common.py playlist_result function	2014-05-21 19:04:55 +09:30
Keith Beckman	ee1a7032d5	Fixed errors found by travisci: py26: re.split can't take flags. use inline flags or re.compile py27: info_dict must be serializable. remove request object py335, py34: no urlparse module. use utils.compat_urlparse	2014-05-20 22:28:32 -04:00
Keith Beckman	7ed806d241	Fixed pyflakes and pep8 warnings	2014-05-20 02:55:21 -04:00
Keith Beckman	dd06c95e43	Added new IE for Grooveshark	2014-05-20 02:47:34 -04:00
anovicecodemonkey	3442b30ab2	[generic] Support data-video-url for YouTube embeds (Fixes #2862 )	2014-05-18 23:15:09 +09:30