release 2013.07.23.1

Merge branch 'master' of github.com:rg3/youtube-dl
[soundcloud] Support URLs with a slash at the end (Fixes #1104 )
2013-07-23 18:37:52 +02:00 · 2013-07-23 18:37:09 +02:00 · 2013-07-23 18:35:52 +02:00 · 2013-07-23 14:58:01 +02:00 · 2013-07-23 14:29:30 +02:00 · 2013-07-23 14:29:29 +02:00
22 changed files with 426 additions and 79 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ notifications:
    - filippo.valsorda@gmail.com
    - phihag@phihag.de
    - jaime.marquinez.ferrandiz+travis@gmail.com
+    - yasoob.khld@gmail.com
 #  irc:
 #    channels:
 #      - "irc.freenode.org#youtube-dl"
--- a/README.md
+++ b/README.md
@@ -16,7 +16,9 @@ which means you can modify it, redistribute it or use it however you like.
 # OPTIONS
    -h, --help                 print this help text and exit
    --version                  print program version and exit
-    -U, --update               update this program to latest version
+    -U, --update               update this program to latest version. Make sure
+                               that you have sufficient permissions (run with
+                               sudo if needed)
    -i, --ignore-errors        continue on download errors
    --dump-user-agent          display the current browser identification
    --user-agent UA            specify a custom user agent
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -5,6 +5,12 @@
 import sys

 tests = [
+    # 92 - vflQw-fB4 2013/07/17
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
+     "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
+    # 90
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
+     "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
    # 88
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
@@ -14,9 +20,9 @@ tests = [
    # 86 - vfl_ymO4Z 2013/06/27
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
     "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
-    # 85
+    # 85 - vflSAFCP9 2013/07/19
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
-     "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+     "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
    # 84
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
--- a/test/test_youtube_sig.py
+++ b/test/test_youtube_sig.py
@@ -13,9 +13,14 @@ from helper import FakeYDL
 sig = YoutubeIE(FakeYDL())._decrypt_signature

 class TestYoutubeSig(unittest.TestCase):
-    def test_43_43(self):
-        wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135'
-        right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE'
+    def test_92(self):
+        wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
+        right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
+        self.assertEqual(sig(wrong), right)
+
+    def test_90(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
+        right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
        self.assertEqual(sig(wrong), right)

    def test_88(self):
@@ -35,7 +40,7 @@ class TestYoutubeSig(unittest.TestCase):

    def test_85(self):
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
-        right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+        right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
        self.assertEqual(sig(wrong), right)

    def test_84(self):
@@ -58,10 +63,5 @@ class TestYoutubeSig(unittest.TestCase):
        right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
        self.assertEqual(sig(wrong), right)

-    def test_92(self):
-        wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
-        right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
-        self.assertEqual(sig(wrong), right)
-
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None):
    general.add_option('-v', '--version',
            action='version', help='print program version and exit')
    general.add_option('-U', '--update',
-            action='store_true', dest='update_self', help='update this program to latest version')
+            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
    general.add_option('-i', '--ignore-errors',
            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
    general.add_option('--dump-user-agent',
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -9,6 +9,7 @@ from .brightcove import BrightcoveIE
 from .canalplus import CanalplusIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
+from .condenast import CondeNastIE
 from .criterion import CriterionIE
 from .cspan import CSpanIE
 from .dailymotion import DailymotionIE
@@ -18,6 +19,7 @@ from .dreisat import DreiSatIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
+from .exfm import ExfmIE
 from .facebook import FacebookIE
 from .flickr import FlickrIE
 from .freesound import FreesoundIE
@@ -50,6 +52,7 @@ from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
+from .sina import SinaIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
@@ -69,7 +72,9 @@ from .veoh import VeohIE
 from .vevo import VevoIE
 from .vimeo import VimeoIE
 from .vine import VineIE
+from .c56 import C56IE
 from .wat import WatIE
+from .weibo import WeiboIE
 from .wimp import WimpIE
 from .worldstarhiphop import WorldStarHipHopIE
 from .xhamster import XHamsterIE
@@ -87,6 +92,7 @@ from .youtube import (
    YoutubeChannelIE,
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
+    YoutubeRecommendedIE,
 )
 from .zdf import ZDFIE

--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@@ -1,6 +1,8 @@
 import re
+import json

 from .common import InfoExtractor
+from ..utils import determine_ext


 class BreakIE(InfoExtractor):
@@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1).split("-")[-1]
-        webpage = self._download_webpage(url, video_id)
-        video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
-        key = re.search(r"icon: '(.+?)',",webpage).group(1)
-        final_url = str(video_url)+"?"+str(key)
-        thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
-        title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
-        ext = video_url.split('.')[-1]
+        embed_url = 'http://www.break.com/embed/%s' % video_id
+        webpage = self._download_webpage(embed_url, video_id)
+        info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
+                                       u'info json', flags=re.DOTALL)
+        info = json.loads(info_json)
+        video_url = info['videoUri']
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
+        if m_youtube is not None:
+            return self.url_result(m_youtube.group(1), 'Youtube')
+        final_url = video_url + '?' + info['AuthToken']
        return [{
            'id':        video_id,
            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
+            'ext':       determine_ext(final_url),
+            'title':     info['contentName'],
+            'thumbnail': info['thumbUri'],
        }]
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class C56IE(InfoExtractor):
+    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+    IE_NAME = u'56.com'
+
+    _TEST ={
+        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
+        u'file': u'93440716.mp4',
+        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'info_dict': {
+            u'title': u'网事知多少 第32期：车怒',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        text_id = mobj.group('textid')
+        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
+                                           text_id, u'Downloading video info')
+        info = json.loads(info_page)['info']
+        best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
+        video_url = best_format['url']
+
+        return {'id': info['vid'],
+                'title': info['Subject'],
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': info.get('bimg') or info.get('img'),
+                }
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -1,26 +1,26 @@
 import re
-import socket
 import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_str,
-    compat_urllib_error,
    compat_urllib_parse_urlparse,
-    compat_urllib_request,

    ExtractorError,
 )


 class CollegeHumorIE(InfoExtractor):
-    _WORKING = False
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'

-    def report_manifest(self, video_id):
-        """Report information extraction."""
-        self.to_screen(u'%s: Downloading XML manifest' % video_id)
+    _TEST = {
+        u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
+        u'file': u'6902724.mp4',
+        u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
+        u'info_dict': {
+            u'title': u'Comic-Con Cosplay Catastrophe',
+            u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',
+        },
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -36,14 +36,16 @@ class CollegeHumorIE(InfoExtractor):

        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
-        try:
-            metaXml = compat_urllib_request.urlopen(xmlUrl).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
+        metaXml = self._download_webpage(xmlUrl, video_id,
+                                         u'Downloading info XML',
+                                         u'Unable to download video info XML')

        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
+            youtubeIdNode = videoNode.find('./youtubeID')
+            if youtubeIdNode is not None:
+                return self.url_result(youtubeIdNode.text, 'Youtube')
            info['description'] = videoNode.findall('./description')[0].text
            info['title'] = videoNode.findall('./caption')[0].text
            info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
@@ -52,11 +54,9 @@ class CollegeHumorIE(InfoExtractor):
            raise ExtractorError(u'Invalid metadata XML file')

        manifest_url += '?hdcore=2.10.3'
-        self.report_manifest(video_id)
-        try:
-            manifestXml = compat_urllib_request.urlopen(manifest_url).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
+        manifestXml = self._download_webpage(manifest_url, video_id,
+                                             u'Downloading XML manifest',
+                                             u'Unable to download video info XML')

        adoc = xml.etree.ElementTree.fromstring(manifestXml)
        try:
@@ -66,9 +66,8 @@ class CollegeHumorIE(InfoExtractor):
        except IndexError as err:
            raise ExtractorError(u'Invalid manifest file')

-        url_pr = compat_urllib_parse_urlparse(manifest_url)
-        url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
+        url_pr = compat_urllib_parse_urlparse(info['thumbnail'])

-        info['url'] = url
-        info['ext'] = 'f4f'
+        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
+        info['ext'] = 'mp4'
        return [info]
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                          (?P<interview>
+                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
                     $"""
    _TEST = {
        u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
@@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
            else:
                epTitle = mobj.group('cntitle')
            dlNewest = False
+        elif mobj.group('interview'):
+            epTitle = mobj.group('interview_title')
+            dlNewest = False
        else:
            dlNewest = not mobj.group('episode')
            if dlNewest:
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    orderedSet,
+    compat_urllib_parse_urlparse,
+    compat_urlparse,
+)
+
+
+class CondeNastIE(InfoExtractor):
+    """
+    Condé Nast is a media group, some of its sites use a custom HTML5 player
+    that works the same in all of them.
+    """
+
+    # The keys are the supported sites and the values are the name to be shown
+    # to the user and in the extractor description.
+    _SITES = {'wired': u'WIRED',
+              'gq': u'GQ',
+              'vogue': u'Vogue',
+              'glamour': u'Glamour',
+              'wmagazine': u'W Magazine',
+              'vanityfair': u'Vanity Fair',
+              }
+
+    _VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
+    IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+
+    _TEST = {
+        u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
+        u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
+        u'md5': u'1921f713ed48aabd715691f774c451f7',
+        u'info_dict': {
+            u'title': u'3D Printed Speakers Lit With LED',
+            u'description': u'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
+        }
+    }
+
+    def _extract_series(self, url, webpage):
+        title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
+                                        webpage, u'series title', flags=re.DOTALL)
+        url_object = compat_urllib_parse_urlparse(url)
+        base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
+        m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
+                              webpage, flags=re.DOTALL)
+        paths = orderedSet(m.group(1) for m in m_paths)
+        build_url = lambda path: compat_urlparse.urljoin(base_url, path)
+        entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
+        return self.playlist_result(entries, playlist_title=title)
+
+    def _extract_video(self, webpage):
+        description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
+                                               r'<div class="video-post-content">(.+?)</div>',
+                                               ],
+                                              webpage, u'description',
+                                              fatal=False, flags=re.DOTALL)
+        params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
+                                    u'player params', flags=re.DOTALL)
+        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
+        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
+        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
+        data = compat_urllib_parse.urlencode({'videoId': video_id,
+                                              'playerId': player_id,
+                                              'target': target,
+                                              })
+        base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
+                                           webpage, u'base info url',
+                                           default='http://player.cnevids.com/player/loader.js?')
+        info_url = base_info_url + data
+        info_page = self._download_webpage(info_url, video_id,
+                                           u'Downloading video info')
+        video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
+        video_info = json.loads(video_info)
+
+        def _formats_sort_key(f):
+            type_ord = 1 if f['type'] == 'video/mp4' else 0
+            quality_ord = 1 if f['quality'] == 'high' else 0
+            return (quality_ord, type_ord)
+        best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
+
+        return {'id': video_id,
+                'url': best_format['src'],
+                'ext': best_format['type'].split('/')[-1],
+                'title': video_info['title'],
+                'thumbnail': video_info['poster_frame'],
+                'description': description,
+                }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        site = mobj.group('site')
+        url_type = mobj.group('type')
+        id = mobj.group('id')
+
+        self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
+        webpage = self._download_webpage(url, id)
+
+        if url_type == 'series':
+            return self._extract_series(url, webpage)
+        else:
+            return self._extract_video(webpage)
--- a/youtube_dl/extractor/exfm.py
+++ b/youtube_dl/extractor/exfm.py
@@ -0,0 +1,42 @@
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class ExfmIE(InfoExtractor):
+    IE_NAME = u'exfm'
+    IE_DESC = u'ex.fm'
+    _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
+    _SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
+    _TEST = {
+        u'url': u'http://ex.fm/song/1bgtzg',
+        u'file': u'1bgtzg.mp3',
+        u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
+        u'info_dict': {
+            u"title": u"We Can't Stop",
+            u"uploader": u"Miley Cyrus",
+            u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        song_id = mobj.group(1)
+        info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
+        webpage = self._download_webpage(info_url, song_id)
+        info = json.loads(webpage)
+        song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
+        if song_url is not None:
+        	song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
+        else:
+        	song_url = info['song']['url']
+        return [{
+            'id':          song_id,
+            'url':         song_url,
+            'ext':         'mp3',
+            'title':       info['song']['title'],
+            'thumbnail':   info['song']['image']['large'],
+            'uploader':    info['song']['artist'],
+            'view_count':  info['song']['loved_count'],
+        }]
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
        u'md5': u'0d2da106a9d2631273e192b372806516',
        u'info_dict': {
            u"uploader_id": u"naomipq", 
-            u"title": u"Video by naomipq"
+            u"title": u"Video by naomipq",
+            u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
        }
    }

@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
-        html_title = self._html_search_regex(
-            r'<title>(.+?)</title>',
-            webpage, u'title', flags=re.DOTALL)
-        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
-        uploader_id = self._html_search_regex(
-            r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
-            webpage, u'uploader id', fatal=False, flags=re.DOTALL)
-        ext = 'mp4'
+        uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
+            webpage, u'uploader id', fatal=False)
+        desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
+            fatal=False)

        return [{
            'id':        video_id,
            'url':       self._og_search_video_url(webpage),
-            'ext':       ext,
-            'title':     title,
+            'ext':       'mp4',
+            'title':     u'Video by %s' % uploader_id,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id' : uploader_id
+            'uploader_id' : uploader_id,
+            'description': desc,
        }]
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+
+class SinaIE(InfoExtractor):
+    _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
+                        (
+                            (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
+                            |
+                            # This is used by external sites like Weibo
+                            (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
+                        )
+                  '''
+
+    _TEST = {
+        u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
+        u'file': u'110028898.flv',
+        u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
+        u'info_dict': {
+            u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
+        }
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
+
+    def _extract_video(self, video_id):
+        data = compat_urllib_parse.urlencode({'vid': video_id})
+        url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
+            video_id, u'Downloading video url')
+        image_page = self._download_webpage(
+            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
+            video_id, u'Downloading thumbnail info')
+        url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
+
+        return {'id': video_id,
+                'url': url_doc.find('./durl/url').text,
+                'ext': 'flv',
+                'title': url_doc.find('./vname').text,
+                'thumbnail': image_page.split('=')[1],
+                }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        video_id = mobj.group('id')
+        if mobj.group('token') is not None:
+            # The video id is in the redirected url
+            self.to_screen(u'Getting video id')
+            request = compat_urllib_request.Request(url)
+            request.get_method = lambda: 'HEAD'
+            (_, urlh) = self._download_webpage_handle(request, 'NA', False)
+            return self._real_extract(urlh.geturl())
+        elif video_id is None:
+            pseudo_id = mobj.group('pseudo_id')
+            webpage = self._download_webpage(url, pseudo_id)
+            video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
+
+        return self._extract_video(video_id)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -19,7 +19,7 @@ class SoundcloudIE(InfoExtractor):
       of the stream token and uid
     """

-    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$'
    IE_NAME = u'soundcloud'
    _TEST = {
        u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -67,7 +67,7 @@ class TEDIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
        self.report_extraction(video_name)
        # If the url includes the language we get the title translated
-        title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
+        title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
                                        webpage, 'title')
        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
                                    webpage, 'json data')
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -10,6 +10,7 @@ class TF1IE(InfoExtractor):
    TF1 uses the wat.tv player, currently it can only download videos with the
    html5 player enabled, it cannot download HD videos.
    """
+    _WORKING = False
    _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
    _TEST = {
        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -12,6 +12,7 @@ from ..utils import (


 class WatIE(InfoExtractor):
+    _WORKING = False
    _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
    IE_NAME = 'wat.tv'
    _TEST = {
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+
+class WeiboIE(InfoExtractor):
+    """
+    The videos in Weibo come from different sites, this IE just finds the link
+    to the external video and returns it.
+    """
+    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
+
+    _TEST = {
+        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
+        u'file': u'98322879.flv',
+        u'info_dict': {
+            u'title': u'魔声耳机最新广告“All Eyes On Us”',
+        },
+        u'note': u'Sina video',
+        u'params': {
+            u'skip_download': True,
+        },
+    }
+
+    # Additional example videos from different sites
+    # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
+    # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        video_id = mobj.group('id')
+        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
+        info_page = self._download_webpage(info_url, video_id)
+        info = json.loads(info_page)
+
+        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
+        #Prefer sina video since they have thumbnails
+        videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
+        player_url = videos_urls[-1]
+        m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
+        if m_sina is not None:
+            self.to_screen('Sina video detected')
+            sina_id = m_sina.group(1)
+            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
+        return self.url_result(player_url)
+
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -13,7 +13,7 @@ from ..utils import (


 class YoukuIE(InfoExtractor):
-    _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
+    _VALID_URL =  r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)'
    _TEST =   {
        u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
        u"file": u"XNDgyMDQ2NTQw_part00.flv",
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -179,14 +179,18 @@ class YoutubeIE(InfoExtractor):
    def _decrypt_signature(self, s):
        """Turn the encrypted s field into a working signature"""

-        if len(s) == 88:
+        if len(s) == 92:
+            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
+        elif len(s) == 90:
+            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
+        elif len(s) == 88:
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
        elif len(s) == 87:
            return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
        elif len(s) == 86:
            return s[2:63] + s[82] + s[64:82] + s[63]
        elif len(s) == 85:
-            return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+            return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
        elif len(s) == 84:
            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
        elif len(s) == 83:
@@ -195,8 +199,6 @@ class YoutubeIE(InfoExtractor):
            return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
        elif len(s) == 81:
            return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
-        elif len(s) == 92:
-            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83];

        else:
            raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
@@ -591,8 +593,9 @@ class YoutubeIE(InfoExtractor):
                            else:
                                player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
                                    'html5 player', fatal=False)
-                            self.to_screen('encrypted signature length %d (%d.%d), itag %s, %s' %
-                                (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
+                            parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
+                            self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
+                                (len(s), parts_sizes, url_data['itag'][0], player))
                        signature = self._decrypt_signature(url_data['s'][0])
                        url += '&signature=' + signature
                    if 'ratebypass' not in url:
@@ -728,7 +731,7 @@ class YoutubeChannelIE(InfoExtractor):
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
-    _MORE_PAGES_URL = 'http://www.youtube.com/channel_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
+    _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
    IE_NAME = u'youtube:channel'

    def extract_videos_from_page(self, page):
@@ -895,12 +898,12 @@ class YoutubeShowIE(InfoExtractor):
        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]


-class YoutubeSubscriptionsIE(YoutubeIE):
-    """It's a subclass of YoutubeIE because we need to login"""
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
-    IE_NAME = u'youtube:subscriptions'
-    _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
+class YoutubeFeedsInfoExtractor(YoutubeIE):
+    """
+    Base class for extractors that fetch info from
+    http://www.youtube.com/feed_ajax
+    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
+    """
    _PAGING_STEP = 30

    # Overwrite YoutubeIE properties we don't want
@@ -909,18 +912,27 @@ class YoutubeSubscriptionsIE(YoutubeIE):
    def suitable(cls, url):
        return re.match(cls._VALID_URL, url) is not None

+    @property
+    def _FEED_TEMPLATE(self):
+        return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
+
+    @property
+    def IE_NAME(self):
+        return u'youtube:%s' % self._FEED_NAME
+
    def _real_initialize(self):
        (username, password) = self._get_login_info()
        if username is None:
            raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
-        super(YoutubeSubscriptionsIE, self)._real_initialize()
+        super(YoutubeFeedsInfoExtractor, self)._real_initialize()

    def _real_extract(self, url):
        feed_entries = []
        # The step argument is available only in 2.7 or higher
        for i in itertools.count(0):
            paging = i*self._PAGING_STEP
-            info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
+            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
            info = json.loads(info)
            feed_html = info['feed_html']
@@ -929,4 +941,16 @@ class YoutubeSubscriptionsIE(YoutubeIE):
            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
            if info['paging'] is None:
                break
-        return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
+        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
+
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    _FEED_NAME = 'subscriptions'
+    _PLAYLIST_TITLE = u'Youtube Subscriptions'
+
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+    _FEED_NAME = 'recommended'
+    _PLAYLIST_TITLE = u'Youtube Recommended videos'
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.07.17.1'
+__version__ = '2013.07.23.1'
Author	SHA1	Message	Date
Philipp Hagemeister	fc492de31d	release 2013.07.23.1	2013-07-23 18:37:52 +02:00
Philipp Hagemeister	a9c0f9bc63	Merge branch 'master' of github.com:rg3/youtube-dl	2013-07-23 18:37:09 +02:00
Philipp Hagemeister	b7cc9f5026	[soundcloud] Support URLs with a slash at the end (Fixes #1104 )	2013-07-23 18:35:52 +02:00
Jaime Marquínez Ferrándiz	252580c561	YoutubeChannelE: switch ajax query from channel_ajax to c4_browse_ajax It wasn't detecting when there aren't more videos	2013-07-23 14:58:01 +02:00
Jaime Marquínez Ferrándiz	acc47c1a3f	Mark WatIE and TF1IE as broken (related #1103 )	2013-07-23 14:29:30 +02:00
Jaime Marquínez Ferrándiz	70fa830e4d	CollegeHumorIE: support Youtube videos and embed urls (fixes #1094 )	2013-07-23 14:29:29 +02:00
Philipp Hagemeister	a7af0ebaf5	release 2013.07.23	2013-07-23 14:20:52 +02:00
Jaime Marquínez Ferrándiz	67ae7b4760	Fix BreakIE Also detect videos that come from Youtube	2013-07-23 11:41:05 +02:00
Jaime Marquínez Ferrándiz	de48addae2	Fix CollegHumorIE Now it downloads the video over http in one file, it doesn't downloads in fragments Added a test and use the methods in InfoExtractor for downloading webpages	2013-07-23 11:14:11 +02:00
Jaime Marquínez Ferrándiz	ddbfd0f0c5	ComedyCentralIE: support the extended interviews urls (fixes #1079 )	2013-07-21 11:04:56 +02:00
Jaime Marquínez Ferrándiz	d7ae0639b4	[youtube] Add an extractor for Youtube recommended videos (":ytrec" keyword) (closes #476 ) The new extractor and YoutubeSubscriptionsIE are subclasses of YoutubeFeedsInfoExtractor, which allows to fetch videos from http://www.youtube.com/feed_ajax	2013-07-20 19:33:40 +02:00
Philipp Hagemeister	0382435990	[exfm] Add IE_* descriptions	2013-07-20 11:26:36 +02:00
Philipp Hagemeister	b390d85d95	Merge remote-tracking branch 'yasoob/master'	2013-07-20 11:23:56 +02:00
Philipp Hagemeister	be925dc64c	release 2013.07.19	2013-07-19 23:42:29 +02:00
Jaime Marquínez Ferrándiz	de7a91bfe3	WeiboIE: extract the player urls from a json webpage Also extract a Sina url that doesn't require to follow a redirection.	2013-07-19 20:43:44 +02:00
Jaime Marquínez Ferrándiz	a4358cbabd	YoutubeIE: new algo for length 85 (closes #1080 ), thanks to @patrickslin	2013-07-19 17:12:40 +02:00
Jaime Marquínez Ferrándiz	177ed935a9	TEDIE: fix the title extraction	2013-07-19 16:13:31 +02:00
Jaime Marquínez Ferrándiz	c364f15ff1	Add WeiboIE (closes #1039 ) It just embed video from other sites. Modified the _VALID_URL of Youku to catch embed urls.	2013-07-19 16:09:14 +02:00
Jaime Marquínez Ferrándiz	e1f6e61e6a	Add an extractor for 56.com (related #1039 )	2013-07-19 15:17:34 +02:00
Jaime Marquínez Ferrándiz	0932300e3a	Add SinaIE (related #1039 ): extractor for video.sina.com.cn	2013-07-18 15:31:50 +02:00
Jaime Marquínez Ferrándiz	3f40217704	InstagramIE: fix the extraction of the uploader_id and the title The page title is now 'Instagram', so we build it. Also extract the description	2013-07-18 13:12:27 +02:00
Philipp Hagemeister	f631c3311a	Hint that --update may need sudo	2013-07-18 12:53:24 +02:00
Philipp Hagemeister	ad433bb372	release 2013.07.18	2013-07-18 12:41:49 +02:00
Jaime Marquínez Ferrándiz	3e0b3a1428	Remove the test to signature of lengths 43,43 It's already covered by the test for length 87	2013-07-18 12:29:09 +02:00
Jaime Marquínez Ferrándiz	444b116597	YoutubeIE: add algo for length 90 (closes #1064 ) Order the cases from higher to lower length.	2013-07-18 12:25:41 +02:00
Jaime Marquínez Ferrándiz	2aea08eda1	Merge pull request #1068 from MiLk/genalgo-youtube-92 [youtube] Add generator for signature 92	2013-07-18 09:54:56 +02:00
M.Yasoob Khalid	8e5e059d7d	forgot to import json json	2013-07-18 12:40:56 +05:00
M.Yasoob Khalid	2b1b511f6b	removed some unnecessary imports	2013-07-18 12:37:47 +05:00
M.Yasoob Khalid	233ad24ecf	corrected a typo and added myself to travis notifications.	2013-07-18 12:37:02 +05:00
M.Yasoob Khalid	c4949c50f9	added test for ex.fm	2013-07-18 12:33:31 +05:00
M.Yasoob Khalid	b6ef402905	added an IE for ex.fm	2013-07-18 12:30:21 +05:00
Emilien Kenler	ccf365475a	[youtube] Add generator for signature 92	2013-07-17 17:43:44 +02:00
Jaime Marquínez Ferrándiz	e1fb245690	Add CondeNastIE It supports some of the websites of the Condé Nast group: WIRED, GQ, Vogue, Glamour, W Magazine and Vanity Fair.	2013-07-17 14:39:02 +02:00
Jaime Marquínez Ferrándiz	5a76c6517e	YoutubeIE: some encrypted signatures have more than two parts, print the size of all the parts	2013-07-17 12:08:10 +02:00