release 2013.07.24

YoutubeIE: new algo for length 87 (fixes #1105 )
Squashed commit from the pull requests #1107, #1109 and #1110.
2013-07-24 10:29:34 +02:00 · 2013-07-24 10:20:52 +02:00 · 2013-07-23 18:37:52 +02:00 · 2013-07-23 18:37:09 +02:00 · 2013-07-23 18:35:52 +02:00 · 2013-07-23 14:58:01 +02:00
21 changed files with 300 additions and 69 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ notifications:
    - filippo.valsorda@gmail.com
    - phihag@phihag.de
    - jaime.marquinez.ferrandiz+travis@gmail.com
+    - yasoob.khld@gmail.com
 #  irc:
 #    channels:
 #      - "irc.freenode.org#youtube-dl"
--- a/README.md
+++ b/README.md
@@ -16,7 +16,9 @@ which means you can modify it, redistribute it or use it however you like.
 # OPTIONS
    -h, --help                 print this help text and exit
    --version                  print program version and exit
-    -U, --update               update this program to latest version
+    -U, --update               update this program to latest version. Make sure
+                               that you have sufficient permissions (run with
+                               sudo if needed)
    -i, --ignore-errors        continue on download errors
    --dump-user-agent          display the current browser identification
    --user-agent UA            specify a custom user agent
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -16,13 +16,13 @@ tests = [
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
    # 87
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
-     "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+     "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
    # 86 - vfl_ymO4Z 2013/06/27
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
     "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
-    # 85
+    # 85 - vflSAFCP9 2013/07/19
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
-     "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+     "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
    # 84
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
--- a/test/test_youtube_sig.py
+++ b/test/test_youtube_sig.py
@@ -30,7 +30,7 @@ class TestYoutubeSig(unittest.TestCase):

    def test_87(self):
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
-        right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+        right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
        self.assertEqual(sig(wrong), right)

    def test_86(self):
@@ -40,7 +40,7 @@ class TestYoutubeSig(unittest.TestCase):

    def test_85(self):
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
-        right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+        right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
        self.assertEqual(sig(wrong), right)

    def test_84(self):
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None):
    general.add_option('-v', '--version',
            action='version', help='print program version and exit')
    general.add_option('-U', '--update',
-            action='store_true', dest='update_self', help='update this program to latest version')
+            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
    general.add_option('-i', '--ignore-errors',
            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
    general.add_option('--dump-user-agent',
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -19,6 +19,7 @@ from .dreisat import DreiSatIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
+from .exfm import ExfmIE
 from .facebook import FacebookIE
 from .flickr import FlickrIE
 from .freesound import FreesoundIE
@@ -51,6 +52,7 @@ from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
+from .sina import SinaIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
@@ -70,7 +72,9 @@ from .veoh import VeohIE
 from .vevo import VevoIE
 from .vimeo import VimeoIE
 from .vine import VineIE
+from .c56 import C56IE
 from .wat import WatIE
+from .weibo import WeiboIE
 from .wimp import WimpIE
 from .worldstarhiphop import WorldStarHipHopIE
 from .xhamster import XHamsterIE
@@ -88,6 +92,7 @@ from .youtube import (
    YoutubeChannelIE,
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
+    YoutubeRecommendedIE,
 )
 from .zdf import ZDFIE

--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@@ -1,6 +1,8 @@
 import re
+import json

 from .common import InfoExtractor
+from ..utils import determine_ext


 class BreakIE(InfoExtractor):
@@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1).split("-")[-1]
-        webpage = self._download_webpage(url, video_id)
-        video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
-        key = re.search(r"icon: '(.+?)',",webpage).group(1)
-        final_url = str(video_url)+"?"+str(key)
-        thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
-        title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
-        ext = video_url.split('.')[-1]
+        embed_url = 'http://www.break.com/embed/%s' % video_id
+        webpage = self._download_webpage(embed_url, video_id)
+        info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
+                                       u'info json', flags=re.DOTALL)
+        info = json.loads(info_json)
+        video_url = info['videoUri']
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
+        if m_youtube is not None:
+            return self.url_result(m_youtube.group(1), 'Youtube')
+        final_url = video_url + '?' + info['AuthToken']
        return [{
            'id':        video_id,
            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
+            'ext':       determine_ext(final_url),
+            'title':     info['contentName'],
+            'thumbnail': info['thumbUri'],
        }]
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class C56IE(InfoExtractor):
+    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+    IE_NAME = u'56.com'
+
+    _TEST ={
+        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
+        u'file': u'93440716.mp4',
+        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'info_dict': {
+            u'title': u'网事知多少 第32期：车怒',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        text_id = mobj.group('textid')
+        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
+                                           text_id, u'Downloading video info')
+        info = json.loads(info_page)['info']
+        best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
+        video_url = best_format['url']
+
+        return {'id': info['vid'],
+                'title': info['Subject'],
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': info.get('bimg') or info.get('img'),
+                }
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -1,26 +1,26 @@
 import re
-import socket
 import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_str,
-    compat_urllib_error,
    compat_urllib_parse_urlparse,
-    compat_urllib_request,

    ExtractorError,
 )


 class CollegeHumorIE(InfoExtractor):
-    _WORKING = False
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'

-    def report_manifest(self, video_id):
-        """Report information extraction."""
-        self.to_screen(u'%s: Downloading XML manifest' % video_id)
+    _TEST = {
+        u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
+        u'file': u'6902724.mp4',
+        u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
+        u'info_dict': {
+            u'title': u'Comic-Con Cosplay Catastrophe',
+            u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',
+        },
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -36,14 +36,16 @@ class CollegeHumorIE(InfoExtractor):

        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
-        try:
-            metaXml = compat_urllib_request.urlopen(xmlUrl).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
+        metaXml = self._download_webpage(xmlUrl, video_id,
+                                         u'Downloading info XML',
+                                         u'Unable to download video info XML')

        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
+            youtubeIdNode = videoNode.find('./youtubeID')
+            if youtubeIdNode is not None:
+                return self.url_result(youtubeIdNode.text, 'Youtube')
            info['description'] = videoNode.findall('./description')[0].text
            info['title'] = videoNode.findall('./caption')[0].text
            info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
@@ -52,11 +54,9 @@ class CollegeHumorIE(InfoExtractor):
            raise ExtractorError(u'Invalid metadata XML file')

        manifest_url += '?hdcore=2.10.3'
-        self.report_manifest(video_id)
-        try:
-            manifestXml = compat_urllib_request.urlopen(manifest_url).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
+        manifestXml = self._download_webpage(manifest_url, video_id,
+                                             u'Downloading XML manifest',
+                                             u'Unable to download video info XML')

        adoc = xml.etree.ElementTree.fromstring(manifestXml)
        try:
@@ -66,9 +66,8 @@ class CollegeHumorIE(InfoExtractor):
        except IndexError as err:
            raise ExtractorError(u'Invalid manifest file')

-        url_pr = compat_urllib_parse_urlparse(manifest_url)
-        url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
+        url_pr = compat_urllib_parse_urlparse(info['thumbnail'])

-        info['url'] = url
-        info['ext'] = 'f4f'
+        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
+        info['ext'] = 'mp4'
        return [info]
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                          (?P<interview>
+                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
                     $"""
    _TEST = {
        u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
@@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
            else:
                epTitle = mobj.group('cntitle')
            dlNewest = False
+        elif mobj.group('interview'):
+            epTitle = mobj.group('interview_title')
+            dlNewest = False
        else:
            dlNewest = not mobj.group('episode')
            if dlNewest:
--- a/youtube_dl/extractor/exfm.py
+++ b/youtube_dl/extractor/exfm.py
@@ -0,0 +1,42 @@
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class ExfmIE(InfoExtractor):
+    IE_NAME = u'exfm'
+    IE_DESC = u'ex.fm'
+    _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
+    _SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
+    _TEST = {
+        u'url': u'http://ex.fm/song/1bgtzg',
+        u'file': u'1bgtzg.mp3',
+        u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
+        u'info_dict': {
+            u"title": u"We Can't Stop",
+            u"uploader": u"Miley Cyrus",
+            u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        song_id = mobj.group(1)
+        info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
+        webpage = self._download_webpage(info_url, song_id)
+        info = json.loads(webpage)
+        song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
+        if song_url is not None:
+        	song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
+        else:
+        	song_url = info['song']['url']
+        return [{
+            'id':          song_id,
+            'url':         song_url,
+            'ext':         'mp3',
+            'title':       info['song']['title'],
+            'thumbnail':   info['song']['image']['large'],
+            'uploader':    info['song']['artist'],
+            'view_count':  info['song']['loved_count'],
+        }]
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
        u'md5': u'0d2da106a9d2631273e192b372806516',
        u'info_dict': {
            u"uploader_id": u"naomipq", 
-            u"title": u"Video by naomipq"
+            u"title": u"Video by naomipq",
+            u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
        }
    }

@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
-        html_title = self._html_search_regex(
-            r'<title>(.+?)</title>',
-            webpage, u'title', flags=re.DOTALL)
-        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
-        uploader_id = self._html_search_regex(
-            r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
-            webpage, u'uploader id', fatal=False, flags=re.DOTALL)
-        ext = 'mp4'
+        uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
+            webpage, u'uploader id', fatal=False)
+        desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
+            fatal=False)

        return [{
            'id':        video_id,
            'url':       self._og_search_video_url(webpage),
-            'ext':       ext,
-            'title':     title,
+            'ext':       'mp4',
+            'title':     u'Video by %s' % uploader_id,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id' : uploader_id
+            'uploader_id' : uploader_id,
+            'description': desc,
        }]
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+
+class SinaIE(InfoExtractor):
+    _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
+                        (
+                            (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
+                            |
+                            # This is used by external sites like Weibo
+                            (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
+                        )
+                  '''
+
+    _TEST = {
+        u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
+        u'file': u'110028898.flv',
+        u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
+        u'info_dict': {
+            u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
+        }
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
+
+    def _extract_video(self, video_id):
+        data = compat_urllib_parse.urlencode({'vid': video_id})
+        url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
+            video_id, u'Downloading video url')
+        image_page = self._download_webpage(
+            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
+            video_id, u'Downloading thumbnail info')
+        url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
+
+        return {'id': video_id,
+                'url': url_doc.find('./durl/url').text,
+                'ext': 'flv',
+                'title': url_doc.find('./vname').text,
+                'thumbnail': image_page.split('=')[1],
+                }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        video_id = mobj.group('id')
+        if mobj.group('token') is not None:
+            # The video id is in the redirected url
+            self.to_screen(u'Getting video id')
+            request = compat_urllib_request.Request(url)
+            request.get_method = lambda: 'HEAD'
+            (_, urlh) = self._download_webpage_handle(request, 'NA', False)
+            return self._real_extract(urlh.geturl())
+        elif video_id is None:
+            pseudo_id = mobj.group('pseudo_id')
+            webpage = self._download_webpage(url, pseudo_id)
+            video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
+
+        return self._extract_video(video_id)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -19,7 +19,7 @@ class SoundcloudIE(InfoExtractor):
       of the stream token and uid
     """

-    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$'
    IE_NAME = u'soundcloud'
    _TEST = {
        u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -67,7 +67,7 @@ class TEDIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
        self.report_extraction(video_name)
        # If the url includes the language we get the title translated
-        title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
+        title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
                                        webpage, 'title')
        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
                                    webpage, 'json data')
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -10,6 +10,7 @@ class TF1IE(InfoExtractor):
    TF1 uses the wat.tv player, currently it can only download videos with the
    html5 player enabled, it cannot download HD videos.
    """
+    _WORKING = False
    _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
    _TEST = {
        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -12,6 +12,7 @@ from ..utils import (


 class WatIE(InfoExtractor):
+    _WORKING = False
    _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
    IE_NAME = 'wat.tv'
    _TEST = {
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+
+class WeiboIE(InfoExtractor):
+    """
+    The videos in Weibo come from different sites, this IE just finds the link
+    to the external video and returns it.
+    """
+    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
+
+    _TEST = {
+        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
+        u'file': u'98322879.flv',
+        u'info_dict': {
+            u'title': u'魔声耳机最新广告“All Eyes On Us”',
+        },
+        u'note': u'Sina video',
+        u'params': {
+            u'skip_download': True,
+        },
+    }
+
+    # Additional example videos from different sites
+    # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
+    # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        video_id = mobj.group('id')
+        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
+        info_page = self._download_webpage(info_url, video_id)
+        info = json.loads(info_page)
+
+        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
+        #Prefer sina video since they have thumbnails
+        videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
+        player_url = videos_urls[-1]
+        m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
+        if m_sina is not None:
+            self.to_screen('Sina video detected')
+            sina_id = m_sina.group(1)
+            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
+        return self.url_result(player_url)
+
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -13,7 +13,7 @@ from ..utils import (


 class YoukuIE(InfoExtractor):
-    _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
+    _VALID_URL =  r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)'
    _TEST =   {
        u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
        u"file": u"XNDgyMDQ2NTQw_part00.flv",
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -186,11 +186,11 @@ class YoutubeIE(InfoExtractor):
        elif len(s) == 88:
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
        elif len(s) == 87:
-            return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
+            return s[4:23] + s[86] + s[24:85]
        elif len(s) == 86:
            return s[2:63] + s[82] + s[64:82] + s[63]
        elif len(s) == 85:
-            return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+            return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
        elif len(s) == 84:
            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
        elif len(s) == 83:
@@ -731,7 +731,7 @@ class YoutubeChannelIE(InfoExtractor):
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
-    _MORE_PAGES_URL = 'http://www.youtube.com/channel_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
+    _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
    IE_NAME = u'youtube:channel'

    def extract_videos_from_page(self, page):
@@ -898,12 +898,12 @@ class YoutubeShowIE(InfoExtractor):
        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]


-class YoutubeSubscriptionsIE(YoutubeIE):
-    """It's a subclass of YoutubeIE because we need to login"""
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
-    IE_NAME = u'youtube:subscriptions'
-    _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
+class YoutubeFeedsInfoExtractor(YoutubeIE):
+    """
+    Base class for extractors that fetch info from
+    http://www.youtube.com/feed_ajax
+    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
+    """
    _PAGING_STEP = 30

    # Overwrite YoutubeIE properties we don't want
@@ -912,18 +912,27 @@ class YoutubeSubscriptionsIE(YoutubeIE):
    def suitable(cls, url):
        return re.match(cls._VALID_URL, url) is not None

+    @property
+    def _FEED_TEMPLATE(self):
+        return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
+
+    @property
+    def IE_NAME(self):
+        return u'youtube:%s' % self._FEED_NAME
+
    def _real_initialize(self):
        (username, password) = self._get_login_info()
        if username is None:
            raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
-        super(YoutubeSubscriptionsIE, self)._real_initialize()
+        super(YoutubeFeedsInfoExtractor, self)._real_initialize()

    def _real_extract(self, url):
        feed_entries = []
        # The step argument is available only in 2.7 or higher
        for i in itertools.count(0):
            paging = i*self._PAGING_STEP
-            info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
+            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
            info = json.loads(info)
            feed_html = info['feed_html']
@@ -932,4 +941,16 @@ class YoutubeSubscriptionsIE(YoutubeIE):
            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
            if info['paging'] is None:
                break
-        return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
+        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
+
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    _FEED_NAME = 'subscriptions'
+    _PLAYLIST_TITLE = u'Youtube Subscriptions'
+
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+    _FEED_NAME = 'recommended'
+    _PLAYLIST_TITLE = u'Youtube Recommended videos'
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.07.18'
+__version__ = '2013.07.24'
Author	SHA1	Message	Date
Philipp Hagemeister	870a7e6156	release 2013.07.24	2013-07-24 10:29:34 +02:00
patrickslin	239e3e0cca	YoutubeIE: new algo for length 87 (fixes #1105 ) Squashed commit from the pull requests #1107, #1109 and #1110.	2013-07-24 10:20:52 +02:00
Philipp Hagemeister	fc492de31d	release 2013.07.23.1	2013-07-23 18:37:52 +02:00
Philipp Hagemeister	a9c0f9bc63	Merge branch 'master' of github.com:rg3/youtube-dl	2013-07-23 18:37:09 +02:00
Philipp Hagemeister	b7cc9f5026	[soundcloud] Support URLs with a slash at the end (Fixes #1104 )	2013-07-23 18:35:52 +02:00
Jaime Marquínez Ferrándiz	252580c561	YoutubeChannelE: switch ajax query from channel_ajax to c4_browse_ajax It wasn't detecting when there aren't more videos	2013-07-23 14:58:01 +02:00
Jaime Marquínez Ferrándiz	acc47c1a3f	Mark WatIE and TF1IE as broken (related #1103 )	2013-07-23 14:29:30 +02:00
Jaime Marquínez Ferrándiz	70fa830e4d	CollegeHumorIE: support Youtube videos and embed urls (fixes #1094 )	2013-07-23 14:29:29 +02:00
Philipp Hagemeister	a7af0ebaf5	release 2013.07.23	2013-07-23 14:20:52 +02:00
Jaime Marquínez Ferrándiz	67ae7b4760	Fix BreakIE Also detect videos that come from Youtube	2013-07-23 11:41:05 +02:00
Jaime Marquínez Ferrándiz	de48addae2	Fix CollegHumorIE Now it downloads the video over http in one file, it doesn't downloads in fragments Added a test and use the methods in InfoExtractor for downloading webpages	2013-07-23 11:14:11 +02:00
Jaime Marquínez Ferrándiz	ddbfd0f0c5	ComedyCentralIE: support the extended interviews urls (fixes #1079 )	2013-07-21 11:04:56 +02:00
Jaime Marquínez Ferrándiz	d7ae0639b4	[youtube] Add an extractor for Youtube recommended videos (":ytrec" keyword) (closes #476 ) The new extractor and YoutubeSubscriptionsIE are subclasses of YoutubeFeedsInfoExtractor, which allows to fetch videos from http://www.youtube.com/feed_ajax	2013-07-20 19:33:40 +02:00
Philipp Hagemeister	0382435990	[exfm] Add IE_* descriptions	2013-07-20 11:26:36 +02:00
Philipp Hagemeister	b390d85d95	Merge remote-tracking branch 'yasoob/master'	2013-07-20 11:23:56 +02:00
Philipp Hagemeister	be925dc64c	release 2013.07.19	2013-07-19 23:42:29 +02:00
Jaime Marquínez Ferrándiz	de7a91bfe3	WeiboIE: extract the player urls from a json webpage Also extract a Sina url that doesn't require to follow a redirection.	2013-07-19 20:43:44 +02:00
Jaime Marquínez Ferrándiz	a4358cbabd	YoutubeIE: new algo for length 85 (closes #1080 ), thanks to @patrickslin	2013-07-19 17:12:40 +02:00
Jaime Marquínez Ferrándiz	177ed935a9	TEDIE: fix the title extraction	2013-07-19 16:13:31 +02:00
Jaime Marquínez Ferrándiz	c364f15ff1	Add WeiboIE (closes #1039 ) It just embed video from other sites. Modified the _VALID_URL of Youku to catch embed urls.	2013-07-19 16:09:14 +02:00
Jaime Marquínez Ferrándiz	e1f6e61e6a	Add an extractor for 56.com (related #1039 )	2013-07-19 15:17:34 +02:00
Jaime Marquínez Ferrándiz	0932300e3a	Add SinaIE (related #1039 ): extractor for video.sina.com.cn	2013-07-18 15:31:50 +02:00
Jaime Marquínez Ferrándiz	3f40217704	InstagramIE: fix the extraction of the uploader_id and the title The page title is now 'Instagram', so we build it. Also extract the description	2013-07-18 13:12:27 +02:00
Philipp Hagemeister	f631c3311a	Hint that --update may need sudo	2013-07-18 12:53:24 +02:00
M.Yasoob Khalid	8e5e059d7d	forgot to import json json	2013-07-18 12:40:56 +05:00
M.Yasoob Khalid	2b1b511f6b	removed some unnecessary imports	2013-07-18 12:37:47 +05:00
M.Yasoob Khalid	233ad24ecf	corrected a typo and added myself to travis notifications.	2013-07-18 12:37:02 +05:00
M.Yasoob Khalid	c4949c50f9	added test for ex.fm	2013-07-18 12:33:31 +05:00
M.Yasoob Khalid	b6ef402905	added an IE for ex.fm	2013-07-18 12:30:21 +05:00