release 2013.12.20

[aparat] Add support (Fixes #2012 )
[blinkxx] Add support for youtube videos
2013-12-20 17:08:16 +01:00 · 2013-12-20 17:05:39 +01:00 · 2013-12-19 21:02:25 +01:00 · 2013-12-19 20:44:30 +01:00 · 2013-12-19 20:32:12 +01:00 · 2013-12-19 11:28:34 -08:00
25 changed files with 349 additions and 78 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import get_testcases

 from youtube_dl.extractor import (
+    FacebookIE,
    gen_extractors,
    JustinTVIE,
    YoutubeIE,
@@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')

+    def test_facebook_matching(self):
+        self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+
    def test_no_duplicates(self):
        ies = gen_extractors()
        for tc in get_testcases():
            url = tc['url']
            for ie in ies:
-                if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
+                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
                    self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
                else:
                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -12,6 +12,7 @@ from test.helper import FakeYDL


 from youtube_dl.extractor import (
+    AcademicEarthCourseIE,
    DailymotionPlaylistIE,
    DailymotionUserIE,
    VimeoChannelIE,
@@ -158,5 +159,16 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'Inspector')
        self.assertTrue(len(result['entries']) >= 9)

+    def test_AcademicEarthCourse(self):
+        dl = FakeYDL()
+        ie = AcademicEarthCourseIE(dl)
+        result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'building-dynamic-websites')
+        self.assertEqual(result['title'], u'Building Dynamic Websites')
+        self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+        self.assertEqual(len(result['entries']), 10)
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -13,20 +13,21 @@ import xml.etree.ElementTree

 #from youtube_dl.utils import htmlentity_transform
 from youtube_dl.utils import (
-    timeconvert,
-    sanitize_filename,
-    unescapeHTML,
-    orderedSet,
    DateRange,
-    unified_strdate,
+    encodeFilename,
    find_xpath_attr,
    get_meta_content,
-    xpath_with_ns,
-    smuggle_url,
-    unsmuggle_url,
+    orderedSet,
+    sanitize_filename,
    shell_quote,
-    encodeFilename,
+    smuggle_url,
    str_to_int,
+    timeconvert,
+    unescapeHTML,
+    unified_strdate,
+    unsmuggle_url,
+    url_basename,
+    xpath_with_ns,
 )

 if sys.version_info < (3, 0):
@@ -181,6 +182,15 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(str_to_int('123,456'), 123456)
        self.assertEqual(str_to_int('123.456'), 123456)

+    def test_url_basename(self):
+        self.assertEqual(url_basename(u'http://foo.de/'), u'')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
+        self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
+        self.assertEqual(
+            url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
+            u'trailer.mp4')

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -47,6 +47,7 @@ from .utils import (
    subtitles_filename,
    takewhile_inclusive,
    UnavailableVideoError,
+    url_basename,
    write_json_file,
    write_string,
    YoutubeDLHandler,
@@ -484,6 +485,7 @@ class YoutubeDL(object):
                    {
                        'extractor': ie.IE_NAME,
                        'webpage_url': url,
+                        'webpage_url_basename': url_basename(url),
                        'extractor_key': ie.ie_key(),
                    })
                if process:
@@ -576,6 +578,7 @@ class YoutubeDL(object):
                    'playlist_index': i + playliststart,
                    'extractor': ie_result['extractor'],
                    'webpage_url': ie_result['webpage_url'],
+                    'webpage_url_basename': url_basename(ie_result['webpage_url']),
                    'extractor_key': ie_result['extractor_key'],
                }

@@ -596,6 +599,7 @@ class YoutubeDL(object):
                    {
                        'extractor': ie_result['extractor'],
                        'webpage_url': ie_result['webpage_url'],
+                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
                        'extractor_key': ie_result['extractor_key'],
                    })
                return r
@@ -845,7 +849,7 @@ class YoutubeDL(object):
            if info_dict.get('thumbnail') is not None:
                thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
                thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                    self.to_screen(u'[%s] %s: Thumbnail is already present' %
                                   (info_dict['extractor'], info_dict['id']))
                else:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -1,6 +1,8 @@
-from .appletrailers import AppleTrailersIE
+from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .anitube import AnitubeIE
+from .aparat import AparatIE
+from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .arte import (
@@ -119,6 +121,7 @@ from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
 from .pyvideo import PyvideoIE
+from .radiofrance import RadioFranceIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@@ -0,0 +1,31 @@
+import re
+
+from .common import InfoExtractor
+
+
+class AcademicEarthCourseIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
+    IE_NAME = u'AcademicEarth:Course'
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        playlist_id = m.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+        title = self._html_search_regex(
+            r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
+        description = self._html_search_regex(
+            r'<p class="excerpt">(.*?)</p>',
+            webpage, u'description', fatal=False)
+        urls = re.findall(
+            r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
+            webpage)
+        entries = [self.url_result(u) for u in urls]
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': title,
+            'description': description,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -0,0 +1,56 @@
+#coding: utf-8
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    HEADRequest,
+)
+
+
+class AparatIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+
+    _TEST = {
+        u'url': u'http://www.aparat.com/v/wP8On',
+        u'file': u'wP8On.mp4',
+        u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
+        u'info_dict': {
+            u"title": u"تیم گلکسی 11 - زومیت",
+        },
+        #u'skip': u'Extremely unreliable',
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        # Note: There is an easier-to-parse configuration at
+        # http://www.aparat.com/video/video/config/videohash/%video_id
+        # but the URL in there does not work
+        embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
+                     video_id + u'/vt/frame')
+        webpage = self._download_webpage(embed_url, video_id)
+
+        video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
+        for i, video_url in enumerate(video_urls):
+            req = HEADRequest(video_url)
+            res = self._request_webpage(
+                req, video_id, note=u'Testing video URL %d' % i, errnote=False)
+            if res:
+                break
+        else:
+            raise ExtractorError(u'No working video URLs found')
+
+        title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
+        thumbnail = self._search_regex(
+            r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'ext': 'mp4',
+            'thumbnail': thumbnail,
+        }
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -266,20 +266,6 @@ class ArteTVDDCIE(ArteTVPlus7IE):
    IE_NAME = u'arte.tv:ddc'
    _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'

-    _TEST = {
-        u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
-        u'file': u'049881-009_PLUS7-D.flv',
-        u'info_dict': {
-            u'title': u'Mit offenen Karten',
-            u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
-            u'upload_date': u'20131207',
-        },
-        u'params': {
-            # rtmp download
-            u'skip_download': True,
-        },
-    }
-
    def _real_extract(self, url):
        video_id, lang = self._extract_url_info(url)
        if lang == 'folge':
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -54,6 +54,10 @@ class BlinkxIE(InfoExtractor):
                })
            elif m['type'] == 'original':
                duration = m['d']
+            elif m['type'] == 'youtube':
+                yt_id = m['link']
+                self.to_screen(u'Youtube video detected: %s' % yt_id)
+                return self.url_result(yt_id, 'Youtube', video_id=yt_id)
            elif m['type'] in ('flv', 'mp4'):
                vcodec = remove_start(m['vcodec'], 'ff')
                acodec = remove_start(m['acodec'], 'ff')
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
    sanitize_filename,
    unescapeHTML,
 )
+_NO_DEFAULT = object()


 class InfoExtractor(object):
@@ -169,6 +170,8 @@ class InfoExtractor(object):
        try:
            return self._downloader.urlopen(url_or_request)
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            if errnote is False:
+                return False
            if errnote is None:
                errnote = u'Unable to download webpage'
            errmsg = u'%s: %s' % (errnote, compat_str(err))
@@ -262,7 +265,8 @@ class InfoExtractor(object):
        self.to_screen(u'Logging in')

    #Methods for following #608
-    def url_result(self, url, ie=None, video_id=None):
+    @staticmethod
+    def url_result(url, ie=None, video_id=None):
        """Returns a url that points to a page that should be processed"""
        #TODO: ie should be the class used for getting the info
        video_info = {'_type': 'url',
@@ -271,7 +275,8 @@ class InfoExtractor(object):
        if video_id is not None:
            video_info['id'] = video_id
        return video_info
-    def playlist_result(self, entries, playlist_id=None, playlist_title=None):
+    @staticmethod
+    def playlist_result(entries, playlist_id=None, playlist_title=None):
        """Returns a playlist"""
        video_info = {'_type': 'playlist',
                      'entries': entries}
@@ -281,7 +286,7 @@ class InfoExtractor(object):
            video_info['title'] = playlist_title
        return video_info

-    def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
        """
        Perform a regex search on the given string, using a single or a list of
        patterns returning the first matching group.
@@ -295,7 +300,7 @@ class InfoExtractor(object):
                mobj = re.search(p, string, flags)
                if mobj: break

-        if sys.stderr.isatty() and os.name != 'nt':
+        if os.name != 'nt' and sys.stderr.isatty():
            _name = u'\033[0;34m%s\033[0m' % name
        else:
            _name = name
@@ -303,7 +308,7 @@ class InfoExtractor(object):
        if mobj:
            # return the first matching group
            return next(g for g in mobj.groups() if g is not None)
-        elif default is not None:
+        elif default is not _NO_DEFAULT:
            return default
        elif fatal:
            raise RegexNotFoundError(u'Unable to extract %s' % _name)
@@ -312,7 +317,7 @@ class InfoExtractor(object):
                u'please report this issue on http://yt-dl.org/bug' % _name)
            return None

-    def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
        """
        Like _search_regex, but strips HTML tags and unescapes entities.
        """
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -17,7 +17,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
    """Information Extractor for Facebook"""

-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
        u'file': u'120708114770723.mp4',
        u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
        u'info_dict': {
-            u"duration": 279, 
+            u"duration": 279,
            u"title": u"PEOPLE ARE AWESOME 2013"
        }
    }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -11,10 +11,14 @@ from ..utils import (
    compat_urlparse,

    ExtractorError,
+    HEADRequest,
    smuggle_url,
    unescapeHTML,
+    unified_strdate,
+    url_basename,
 )
 from .brightcove import BrightcoveIE
+from .ooyala import OoyalaIE


 class GenericIE(InfoExtractor):
@@ -71,6 +75,27 @@ class GenericIE(InfoExtractor):
                u'skip_download': True,
            },
        },
+        # Direct link to a video
+        {
+            u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4',
+            u'file': u'trailer.mp4',
+            u'md5': u'67d406c2bcb6af27fa886f31aa934bbe',
+            u'info_dict': {
+                u'id': u'trailer',
+                u'title': u'trailer',
+                u'upload_date': u'20100513',
+            }
+        },
+        # ooyala video
+        {
+            u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+            u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c',
+            u'info_dict': {
+                u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+                u'ext': u'mp4',
+                u'title': u'2cc213299525360.mov', #that's what we get
+            },
+        },
    ]

    def report_download_webpage(self, video_id):
@@ -83,23 +108,20 @@ class GenericIE(InfoExtractor):
        """Report information extraction."""
        self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)

-    def _test_redirect(self, url):
+    def _send_head(self, url):
        """Check if it is a redirect, like url shorteners, in case return the new url."""
-        class HeadRequest(compat_urllib_request.Request):
-            def get_method(self):
-                return "HEAD"

        class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
            """
            Subclass the HTTPRedirectHandler to make it use our
-            HeadRequest also on the redirected URL
+            HEADRequest also on the redirected URL
            """
            def redirect_request(self, req, fp, code, msg, headers, newurl):
                if code in (301, 302, 303, 307):
                    newurl = newurl.replace(' ', '%20')
                    newheaders = dict((k,v) for k,v in req.headers.items()
                                      if k.lower() not in ("content-length", "content-type"))
-                    return HeadRequest(newurl,
+                    return HEADRequest(newurl,
                                       headers=newheaders,
                                       origin_req_host=req.get_origin_req_host(),
                                       unverifiable=True)
@@ -128,32 +150,49 @@ class GenericIE(InfoExtractor):
                        compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
            opener.add_handler(handler())

-        response = opener.open(HeadRequest(url))
+        response = opener.open(HEADRequest(url))
        if response is None:
            raise ExtractorError(u'Invalid URL protocol')
-        new_url = response.geturl()
-
-        if url == new_url:
-            return False
-
-        self.report_following_redirect(new_url)
-        return new_url
+        return response

    def _real_extract(self, url):
        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
            return self.url_result('http://' + url)
+        video_id = os.path.splitext(url.split('/')[-1])[0]

        try:
-            new_url = self._test_redirect(url)
-            if new_url:
-                return [self.url_result(new_url)]
+            response = self._send_head(url)
+
+            # Check for redirect
+            new_url = response.geturl()
+            if url != new_url:
+                self.report_following_redirect(new_url)
+                return self.url_result(new_url)
+
+            # Check for direct link to a video
+            content_type = response.headers.get('Content-Type', '')
+            m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
+            if m:
+                upload_date = response.headers.get('Last-Modified')
+                if upload_date:
+                    upload_date = unified_strdate(upload_date)
+                return {
+                    'id': video_id,
+                    'title': os.path.splitext(url_basename(url))[0],
+                    'formats': [{
+                        'format_id': m.group('format_id'),
+                        'url': url,
+                        'vcodec': u'none' if m.group('type') == 'audio' else None
+                    }],
+                    'upload_date': upload_date,
+                }
+
        except compat_urllib_error.HTTPError:
            # This may be a stupid server that doesn't like HEAD, our UA, or so
            pass

-        video_id = url.split('/')[-1]
        try:
            webpage = self._download_webpage(url, video_id)
        except ValueError:
@@ -192,8 +231,11 @@ class GenericIE(InfoExtractor):
            return self.url_result(surl, 'Vimeo')

        # Look for embedded YouTube player
-        matches = re.findall(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
+        matches = re.findall(r'''(?x)
+            (?:<iframe[^>]+?src=|embedSWF\(\s*)
+            (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+                (?:embed|v)/.+?)
+            \1''', webpage)
        if matches:
            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
                     for tuppl in matches]
@@ -222,6 +264,18 @@ class GenericIE(InfoExtractor):
                'id': video_id,
            }

+        # Look for embedded blip.tv player
+        mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
+        if mobj:
+            return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
+        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
+        if mobj:
+            player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
+            player_page = self._download_webpage(player_url, mobj.group(1))
+            blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
+            if blip_video_id:
+                return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
+
        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
@@ -229,6 +283,22 @@ class GenericIE(InfoExtractor):
            # Don't set the extractor because it can be a track url or an album
            return self.url_result(burl)

+        # Look for embedded Vevo player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
+        # Look for Ooyala videos
+        mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+        if mobj is not None:
+            return OoyalaIE._build_url_result(mobj.group(1))
+
+        # Look for Aparat videos
+        mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group(1), 'Aparat')
+
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -11,7 +11,7 @@ from ..utils import (
 class ImdbIE(InfoExtractor):
    IE_NAME = u'imdb'
    IE_DESC = u'Internet Movie Database trailers'
-    _VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'

    _TEST = {
        u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
@@ -27,7 +27,7 @@ class ImdbIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(url,video_id)
+        webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
        descr = get_element_by_attribute('itemprop', 'description', webpage)
        available_formats = re.findall(
            r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -93,7 +93,9 @@ class MTVServicesInfoExtractor(InfoExtractor):


 class MTVIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
+    _VALID_URL = r'''(?x)^https?://
+        (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
+           m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''

    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'

@@ -127,16 +129,17 @@ class MTVIE(MTVServicesInfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
-
-        webpage = self._download_webpage(url, video_id)
-
-        # Some videos come from Vevo.com
-        m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
-                           webpage, re.DOTALL)
-        if m_vevo:
-            vevo_id = m_vevo.group(1);
-            self.to_screen(u'Vevo video detected: %s' % vevo_id)
-            return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
-
-        uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
+        uri = mobj.group('mgid')
+        if uri is None:
+            webpage = self._download_webpage(url, video_id)
+    
+            # Some videos come from Vevo.com
+            m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
+                               webpage, re.DOTALL)
+            if m_vevo:
+                vevo_id = m_vevo.group(1);
+                self.to_screen(u'Vevo video detected: %s' % vevo_id)
+                return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+    
+            uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
        return self._get_videos_info(uri)
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -22,6 +22,11 @@ class OoyalaIE(InfoExtractor):
    def _url_for_embed_code(embed_code):
        return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code

+    @classmethod
+    def _build_url_result(cls, embed_code):
+        return cls.url_result(cls._url_for_embed_code(embed_code),
+            ie=cls.ie_key())
+
    def _extract_result(self, info, more_info):
        return {'id': info['embedCode'],
                'ext': 'mp4',
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+
+
+class RadioFranceIE(InfoExtractor):
+    _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
+    IE_NAME = u'radiofrance'
+
+    _TEST = {
+        u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
+        u'file': u'one-one.ogg',
+        u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
+        u'info_dict': {
+            u"title": u"One to one",
+            u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
+            u"uploader": u"Thomas Hercouët",
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
+        description = self._html_search_regex(
+            r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
+            webpage, u'description', fatal=False)
+        uploader = self._html_search_regex(
+            r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
+            webpage, u'uploader', fatal=False)
+
+        formats_str = self._html_search_regex(
+            r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
+            webpage, u'audio URLs')
+        formats = [
+            {
+                'format_id': fm[0],
+                'url': fm[1],
+                'vcodec': 'none',
+            }
+            for fm in
+            re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
+        ]
+        # No sorting, we don't know any more about these formats
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'uploader': uploader,
+        }
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -10,7 +10,7 @@ from ..utils import (

 class RTLnowIE(InfoExtractor):
    """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
    _TESTS = [{
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
        u'file': u'90419.flv',
@@ -82,7 +82,7 @@ class RTLnowIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)

        webpage_url = u'http://' + mobj.group('url')
-        video_page_url = u'http://' + mobj.group('base_url')
+        video_page_url = u'http://' + mobj.group('domain') + u'/'
        video_id = mobj.group(u'video_id')

        webpage = self._download_webpage(webpage_url, video_id)
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -202,7 +202,7 @@ class SmotriIE(InfoExtractor):
            'uploader': video_uploader,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
-            'video_duration': video_duration,
+            'duration': video_duration,
            'view_count': video_view_count,
            'age_limit': 18 if adult_content else 0,
            'video_page_url': video_page_url
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -24,7 +24,7 @@ class SoundcloudIE(InfoExtractor):
     """

    _VALID_URL = r'''^(?:https?://)?
-                    (?:(?:(?:www\.)?soundcloud\.com/
+                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
                            (?P<uploader>[\w\d-]+)/
                            (?!sets/)(?P<title>[\w\d-]+)/?
                            (?P<token>[^?]+?)?(?:[?].*)?$)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -15,7 +15,12 @@ class VevoIE(InfoExtractor):
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
-    _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
+    _VALID_URL = r'''(?x)
+        (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
+           https?://cache\.vevo\.com/m/html/embed\.html\?video=|
+           https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
+           vevo:)
+        (?P<id>[^&?#]+)'''
    _TESTS = [{
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@@ -15,6 +15,7 @@ class VideoPremiumIE(InfoExtractor):
        u'params': {
            u'skip_download': True,
        },
+        u'skip': u'Test file has been deleted.',
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -32,7 +32,7 @@ class XTubeIE(InfoExtractor):

        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
-        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
+        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', fatal=False)
        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1361,7 +1361,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                video_description = u''

        def _extract_count(klass):
-            count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
+            count = self._search_regex(
+                r'class="%s">([\d,]+)</span>' % re.escape(klass),
+                video_webpage, klass, default=None)
            if count is not None:
                return int(count.replace(',', ''))
            return None
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -767,6 +767,10 @@ def unified_strdate(date_str):
            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
        except:
            pass
+    if upload_date is None:
+        timetuple = email.utils.parsedate_tz(date_str)
+        if timetuple:
+            upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
    return upload_date

 def determine_ext(url, default_ext=u'unknown_video'):
@@ -1066,13 +1070,14 @@ def fix_xml_all_ampersand(xml_str):


 def setproctitle(title):
+    assert isinstance(title, type(u''))
    try:
        libc = ctypes.cdll.LoadLibrary("libc.so.6")
    except OSError:
        return
    title = title
    buf = ctypes.create_string_buffer(len(title) + 1)
-    buf.value = title
+    buf.value = title.encode('utf-8')
    try:
        libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
    except AttributeError:
@@ -1083,3 +1088,13 @@ def remove_start(s, start):
    if s.startswith(start):
        return s[len(start):]
    return s
+
+
+def url_basename(url):
+    path = compat_urlparse.urlparse(url).path
+    return path.strip(u'/').split(u'/')[-1]
+
+
+class HEADRequest(compat_urllib_request.Request):
+    def get_method(self):
+        return "HEAD"
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.12.16.2'
+__version__ = '2013.12.20'
Author	SHA1	Message	Date
Philipp Hagemeister	f65c1d2be0	release 2013.12.20	2013-12-20 17:08:16 +01:00
Philipp Hagemeister	aa94a6d315	[aparat] Add support (Fixes #2012 )	2013-12-20 17:05:39 +01:00
Jaime Marquínez Ferrándiz	768df74538	[blinkxx] Add support for youtube videos	2013-12-19 21:02:25 +01:00
Philipp Hagemeister	1f9da9049b	[generic] Support YouTube swf embed (Fixes #2010 )	2013-12-19 20:44:30 +01:00
Jaime Marquínez Ferrándiz	c0d0b01f0e	[generic] Detect ooyala videos (fixes #2013 )	2013-12-19 20:32:12 +01:00
Philipp Hagemeister	7c86a5b864	Merge pull request #2011 from dstftw/master [imdb] Add support for mobile site URLs	2013-12-19 11:28:34 -08:00
dst	97e302a419	[imdb] Add support for mobile site URLs	2013-12-20 00:21:04 +07:00
Philipp Hagemeister	71507a11c8	[soundcloud] Support mobile URLs (Fixes #2009 )	2013-12-19 16:39:01 +01:00
Jaime Marquínez Ferrándiz	1fb8f09273	Merge pull request #2006 from dstftw/master [smotri] Fix duration field name	2013-12-18 15:40:40 -08:00
dst	0cc83dc54b	[smotri] Fix duration field name	2013-12-19 05:56:48 +07:00
Philipp Hagemeister	3e78514568	[generic] Support application/ogg for direct links Also remove some debugging code.	2013-12-17 16:26:34 +01:00
Philipp Hagemeister	e029b8bd43	[utils] Remove duplicated line This line was added by accident in `42393ce234`	2013-12-17 16:12:20 +01:00
Philipp Hagemeister	f5567e401c	Merge pull request #1997 from rg3/simplify-url_basename Simplify url_basename	2013-12-17 07:08:48 -08:00
Jaime Marquínez Ferrándiz	9b8aaeed85	Simplify url_basename Use urlparse from the standard library.	2013-12-17 14:56:29 +01:00
Philipp Hagemeister	6086d121cb	release 2013.12.17.2	2013-12-17 12:35:57 +01:00
Philipp Hagemeister	7de6e075b4	[radiofrance] remove unused imports	2013-12-17 12:35:16 +01:00
Philipp Hagemeister	946135aa2a	[academicearth] remove unused imports	2013-12-17 12:34:30 +01:00
Philipp Hagemeister	42393ce234	Add support for direct links to a video (#1973 )	2013-12-17 12:33:55 +01:00
Philipp Hagemeister	d6c7a367e8	[utils] Fix url_basename	2013-12-17 12:32:58 +01:00
Philipp Hagemeister	cecaaf3f58	[generic] Do not use compatibility result fallback	2013-12-17 12:04:33 +01:00
Philipp Hagemeister	f09828b4e1	release 2013.12.17.1	2013-12-17 04:13:41 +01:00
Philipp Hagemeister	29eb517403	Add webpage_url_basename info_dict field (Fixes #1938 )	2013-12-17 04:13:36 +01:00
Philipp Hagemeister	44c471c3b8	release 2013.12.17	2013-12-17 02:51:22 +01:00
Philipp Hagemeister	46374a56b2	[youtube] Do not warn for videos with allow_rating=0 This fixes #1982 Test video: http://www.youtube.com/watch?v=gi2uH3YxohU	2013-12-17 02:49:56 +01:00
Philipp Hagemeister	ec98946ef9	[academicearth] Support playlists (Closes #1976 )	2013-12-17 02:41:34 +01:00
Philipp Hagemeister	fa77b742ac	[radiofrance] Fill in test details	2013-12-16 23:07:57 +01:00
Philipp Hagemeister	8b4e274610	[rtlnow] Fix URL calculation (Closes #1989 )	2013-12-16 22:28:52 +01:00
Philipp Hagemeister	d6756d3758	[playlist-test] require a string	2013-12-16 22:25:02 +01:00
Philipp Hagemeister	11b68f6e1b	release 2013.12.16.7	2013-12-16 22:18:58 +01:00
Philipp Hagemeister	88bb52ee18	Merge branch 'master' of github.com:rg3/youtube-dl	2013-12-16 22:18:37 +01:00
Philipp Hagemeister	d90df974c3	[academicearth] Add support for courses (#1976 )	2013-12-16 22:18:27 +01:00
Jaime Marquínez Ferrándiz	5c541b2cb7	[mtv] Add support for urls from the mobile site (fixes #1959 )	2013-12-16 22:05:28 +01:00
Itay Brandes	87a28127d2	_search_regex's "isatty" call fails with Py2exe's _search_regex calls the sys.stderr.isatty() function for unix systems. Py2exe uses a custom Stderr() stream which doesn't have an `isatty()` function, leading to it's crash. Fixes easily with checking that it's a unix system first.	2013-12-16 21:50:26 +01:00
Philipp Hagemeister	ebce53b3d8	[vevo] Add suppor for videoplayer. URLs (#1957 )	2013-12-16 21:48:38 +01:00
Philipp Hagemeister	83c632dc43	release 2013.12.16.6	2013-12-16 21:46:16 +01:00
Philipp Hagemeister	ff07a05575	Merge branch 'master' of github.com:rg3/youtube-dl	2013-12-16 21:46:11 +01:00
Philipp Hagemeister	f25571ffbf	Add support for embedded vevo player (Fixes #1957 )	2013-12-16 21:45:21 +01:00
Jaime Marquínez Ferrándiz	f7a6892572	[arte:ddc] Remove test video seems to expire in 7 days, as arte+7	2013-12-16 21:42:41 +01:00
Philipp Hagemeister	8fe56478f8	release 2013.12.16.5	2013-12-16 21:34:47 +01:00
Philipp Hagemeister	0e2a436dce	[radiofrance] Add support (Fixes #1942 )	2013-12-16 21:34:41 +01:00
Philipp Hagemeister	24050dd11c	release 2013.12.16.4	2013-12-16 21:10:18 +01:00
Philipp Hagemeister	8c8e3eec79	[facebook] Recognize #! URLs (Fixes #1988 )	2013-12-16 21:10:06 +01:00
Philipp Hagemeister	7ebc9dee69	Merge pull request #1987 from rzhxeo/blip [GenericIE] Add support for embedded blip.tv	2013-12-16 11:28:34 -08:00
rzhxeo	ee3e63e477	[GenericIE] Add support for embedded blip.tv	2013-12-16 20:08:23 +01:00
Philipp Hagemeister	e9c424c144	Merge pull request #1984 from alimirjamali/patch-1 Incorrect variable is used to check whether thumbnail exists	2013-12-16 09:04:36 -08:00
alimirjamali	0a9ce268ba	Incorrect variable is used to check whether thumbnail exists Dear @phihag I believe in line 848, the correct variable to check is 'thumb_filename' rather than 'infofn' Kindly advise Mit freundlichen Gruessen Ali	2013-12-16 20:14:28 +03:30
Philipp Hagemeister	4b2da48ea7	release 2013.12.16.3	2013-12-16 14:44:29 +01:00
Philipp Hagemeister	e64eaaa97d	Fix execution under Python 3	2013-12-16 14:44:17 +01:00
Philipp Hagemeister	780603027f	[videopremium] Skip test	2013-12-16 14:42:07 +01:00