Compare commits
	
		
			50 Commits
		
	
	
		
			2013.01.17
			...
			2014.01.21
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					2989501131 | ||
| 
						 | 
					7b0817e8e1 | ||
| 
						 | 
					9d4288b2d4 | ||
| 
						 | 
					3486df383b | ||
| 
						 | 
					b60016e831 | ||
| 
						 | 
					5aafe895fc | ||
| 
						 | 
					b853d2e155 | ||
| 
						 | 
					c91778f8c0 | ||
| 
						 | 
					5016f3eac8 | ||
| 
						 | 
					efb1bb90a0 | ||
| 
						 | 
					4cf393bb4b | ||
| 
						 | 
					ce4e242a6f | ||
| 
						 | 
					b27bec212f | ||
| 
						 | 
					704519c7e3 | ||
| 
						 | 
					6b79f40c3d | ||
| 
						 | 
					dfa50793d8 | ||
| 
						 | 
					2a7c35dd46 | ||
| 
						 | 
					f2ffd10bb2 | ||
| 
						 | 
					8da531359e | ||
| 
						 | 
					e2b944cf43 | ||
| 
						 | 
					3ec05685f7 | ||
| 
						 | 
					e103fd46ca | ||
| 
						 | 
					877bfd69d1 | ||
| 
						 | 
					e0ef49f205 | ||
| 
						 | 
					f68cd00fe3 | ||
| 
						 | 
					ca70d215cf | ||
| 
						 | 
					d0390a0c92 | ||
| 
						 | 
					dd2535c38a | ||
| 
						 | 
					b78d180170 | ||
| 
						 | 
					26dca1661e | ||
| 
						 | 
					f853f8594d | ||
| 
						 | 
					8307aa73fb | ||
| 
						 | 
					d0da491e1e | ||
| 
						 | 
					6e249060cf | ||
| 
						 | 
					fbcd7b5f83 | ||
| 
						 | 
					9ac0a67581 | ||
| 
						 | 
					befdc8f3b6 | ||
| 
						 | 
					bb198c95e2 | ||
| 
						 | 
					c1195541b7 | ||
| 
						 | 
					26844eb57b | ||
| 
						 | 
					a7732b672e | ||
| 
						 | 
					677b3ce82f | ||
| 
						 | 
					fabfe17d5e | ||
| 
						 | 
					82696d5d5d | ||
| 
						 | 
					9eea4fb835 | ||
| 
						 | 
					484aaeb204 | ||
| 
						 | 
					58c3c7ae38 | ||
| 
						 | 
					c8650f7ecd | ||
| 
						 | 
					14e7543a5a | ||
| 
						 | 
					bf6705f584 | 
@@ -71,6 +71,7 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
    --download-archive FILE    Download only videos not listed in the archive
 | 
			
		||||
                               file. Record the IDs of all downloaded videos in
 | 
			
		||||
                               it.
 | 
			
		||||
    --include-ads              Download advertisements as well (experimental)
 | 
			
		||||
 | 
			
		||||
## Download Options:
 | 
			
		||||
    -r, --rate-limit LIMIT     maximum download rate in bytes per second (e.g.
 | 
			
		||||
 
 | 
			
		||||
@@ -148,7 +148,7 @@ def generator(test_case):
 | 
			
		||||
                    for key, value in info_dict.items()
 | 
			
		||||
                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
 | 
			
		||||
                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
 | 
			
		||||
                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')
 | 
			
		||||
                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
 | 
			
		||||
 | 
			
		||||
                # Check for the presence of mandatory fields
 | 
			
		||||
                for key in ('id', 'url', 'title', 'ext'):
 | 
			
		||||
 
 | 
			
		||||
@@ -32,6 +32,7 @@ from youtube_dl.extractor import (
 | 
			
		||||
    IviCompilationIE,
 | 
			
		||||
    ImdbListIE,
 | 
			
		||||
    KhanAcademyIE,
 | 
			
		||||
    EveryonesMixtapeIE,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -210,6 +211,15 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 3)
 | 
			
		||||
 | 
			
		||||
    def test_EveryonesMixtape(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = EveryonesMixtapeIE(dl)
 | 
			
		||||
        result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'm7m0jJAbMQi')
 | 
			
		||||
        self.assertEqual(result['title'], 'Driving')
 | 
			
		||||
        self.assertEqual(len(result['entries']), 24)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -167,13 +167,13 @@ class TestTedSubtitles(BaseTestSubtitles):
 | 
			
		||||
    def test_subtitles(self):
 | 
			
		||||
        self.DL.params['writesubtitles'] = True
 | 
			
		||||
        subtitles = self.getSubtitles()
 | 
			
		||||
        self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
 | 
			
		||||
        self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
 | 
			
		||||
 | 
			
		||||
    def test_subtitles_lang(self):
 | 
			
		||||
        self.DL.params['writesubtitles'] = True
 | 
			
		||||
        self.DL.params['subtitleslangs'] = ['fr']
 | 
			
		||||
        subtitles = self.getSubtitles()
 | 
			
		||||
        self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
 | 
			
		||||
        self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
 | 
			
		||||
 | 
			
		||||
    def test_allsubtitles(self):
 | 
			
		||||
        self.DL.params['writesubtitles'] = True
 | 
			
		||||
 
 | 
			
		||||
@@ -16,6 +16,7 @@ from youtube_dl.utils import (
 | 
			
		||||
    DateRange,
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    fix_xml_ampersands,
 | 
			
		||||
    get_meta_content,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
@@ -200,5 +201,18 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(parse_duration('9:12:43'), 33163)
 | 
			
		||||
        self.assertEqual(parse_duration('x:y'), None)
 | 
			
		||||
 | 
			
		||||
    def test_fix_xml_ampersands(self):
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a')
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            fix_xml_ampersands('"&x=y&wrong;&z=a'),
 | 
			
		||||
            '"&x=y&wrong;&z=a')
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            fix_xml_ampersands('&'><"'),
 | 
			
		||||
            '&'><"')
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼')
 | 
			
		||||
        self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#')
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -27,12 +27,6 @@ _TESTS = [
 | 
			
		||||
        85,
 | 
			
		||||
        u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
 | 
			
		||||
        u'swf',
 | 
			
		||||
        82,
 | 
			
		||||
        u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
 | 
			
		||||
    ),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -151,6 +151,7 @@ class YoutubeDL(object):
 | 
			
		||||
    bidi_workaround:   Work around buggy terminals without bidirectional text
 | 
			
		||||
                       support, using fridibi
 | 
			
		||||
    debug_printtraffic:Print out sent and received HTTP traffic
 | 
			
		||||
    include_ads:       Download ads as well
 | 
			
		||||
 | 
			
		||||
    The following parameters are not used by YoutubeDL itself, they are used by
 | 
			
		||||
    the FileDownloader:
 | 
			
		||||
@@ -908,6 +909,14 @@ class YoutubeDL(object):
 | 
			
		||||
                    if info_dict.get('requested_formats') is not None:
 | 
			
		||||
                        downloaded = []
 | 
			
		||||
                        success = True
 | 
			
		||||
                        merger = FFmpegMergerPP(self)
 | 
			
		||||
                        if not merger._get_executable():
 | 
			
		||||
                            postprocessors = []
 | 
			
		||||
                            self.report_warning('You have requested multiple '
 | 
			
		||||
                                'formats but ffmpeg or avconv are not installed.'
 | 
			
		||||
                                ' The formats won\'t be merged')
 | 
			
		||||
                        else:
 | 
			
		||||
                            postprocessors = [merger]
 | 
			
		||||
                        for f in info_dict['requested_formats']:
 | 
			
		||||
                            new_info = dict(info_dict)
 | 
			
		||||
                            new_info.update(f)
 | 
			
		||||
@@ -916,7 +925,7 @@ class YoutubeDL(object):
 | 
			
		||||
                            downloaded.append(fname)
 | 
			
		||||
                            partial_success = dl(fname, new_info)
 | 
			
		||||
                            success = success and partial_success
 | 
			
		||||
                        info_dict['__postprocessors'] = [FFmpegMergerPP(self)]
 | 
			
		||||
                        info_dict['__postprocessors'] = postprocessors
 | 
			
		||||
                        info_dict['__files_to_merge'] = downloaded
 | 
			
		||||
                    else:
 | 
			
		||||
                        # Just a single file
 | 
			
		||||
 
 | 
			
		||||
@@ -39,6 +39,7 @@ __authors__  = (
 | 
			
		||||
    'Sergey M.',
 | 
			
		||||
    'Michael Orlitzky',
 | 
			
		||||
    'Chris Gahan',
 | 
			
		||||
    'Saimadhav Heblikar',
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
__license__ = 'Public Domain'
 | 
			
		||||
@@ -237,7 +238,10 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    selection.add_option('--download-archive', metavar='FILE',
 | 
			
		||||
                         dest='download_archive',
 | 
			
		||||
                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
 | 
			
		||||
 | 
			
		||||
    selection.add_option(
 | 
			
		||||
        '--include-ads', dest='include_ads',
 | 
			
		||||
        action='store_true',
 | 
			
		||||
        help='Download advertisements as well (experimental)')
 | 
			
		||||
 | 
			
		||||
    authentication.add_option('-u', '--username',
 | 
			
		||||
            dest='username', metavar='USERNAME', help='account username')
 | 
			
		||||
@@ -715,6 +719,7 @@ def _real_main(argv=None):
 | 
			
		||||
        'bidi_workaround': opts.bidi_workaround,
 | 
			
		||||
        'debug_printtraffic': opts.debug_printtraffic,
 | 
			
		||||
        'prefer_ffmpeg': opts.prefer_ffmpeg,
 | 
			
		||||
        'include_ads': opts.include_ads,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    with YoutubeDL(ydl_opts) as ydl:
 | 
			
		||||
 
 | 
			
		||||
@@ -47,11 +47,13 @@ from .depositfiles import DepositFilesIE
 | 
			
		||||
from .dotsub import DotsubIE
 | 
			
		||||
from .dreisat import DreiSatIE
 | 
			
		||||
from .defense import DefenseGouvFrIE
 | 
			
		||||
from .dropbox import DropboxIE
 | 
			
		||||
from .ebaumsworld import EbaumsWorldIE
 | 
			
		||||
from .ehow import EHowIE
 | 
			
		||||
from .eighttracks import EightTracksIE
 | 
			
		||||
from .eitb import EitbIE
 | 
			
		||||
from .escapist import EscapistIE
 | 
			
		||||
from .everyonesmixtape import EveryonesMixtapeIE
 | 
			
		||||
from .exfm import ExfmIE
 | 
			
		||||
from .extremetube import ExtremeTubeIE
 | 
			
		||||
from .facebook import FacebookIE
 | 
			
		||||
@@ -61,6 +63,7 @@ from .fktv import (
 | 
			
		||||
    FKTVPosteckeIE,
 | 
			
		||||
)
 | 
			
		||||
from .flickr import FlickrIE
 | 
			
		||||
from .franceinter import FranceInterIE
 | 
			
		||||
from .francetv import (
 | 
			
		||||
    PluzzIE,
 | 
			
		||||
    FranceTvInfoIE,
 | 
			
		||||
@@ -149,6 +152,7 @@ from .rottentomatoes import RottenTomatoesIE
 | 
			
		||||
from .roxwel import RoxwelIE
 | 
			
		||||
from .rtlnow import RTLnowIE
 | 
			
		||||
from .rutube import RutubeIE
 | 
			
		||||
from .servingsys import ServingSysIE
 | 
			
		||||
from .sina import SinaIE
 | 
			
		||||
from .slashdot import SlashdotIE
 | 
			
		||||
from .slideshare import SlideshareIE
 | 
			
		||||
 
 | 
			
		||||
@@ -9,9 +9,11 @@ from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    fix_xml_ampersands,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
@@ -83,17 +85,30 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		||||
                            lambda m: m.group(1) + '/>', object_str)
 | 
			
		||||
        # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
 | 
			
		||||
        object_str = object_str.replace('<--', '<!--')
 | 
			
		||||
        object_str = fix_xml_ampersands(object_str)
 | 
			
		||||
 | 
			
		||||
        object_doc = xml.etree.ElementTree.fromstring(object_str)
 | 
			
		||||
        assert 'BrightcoveExperience' in object_doc.attrib['class']
 | 
			
		||||
        params = {
 | 
			
		||||
            'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
 | 
			
		||||
        flashvars = dict(
 | 
			
		||||
            (k, v[0])
 | 
			
		||||
            for k, v in compat_parse_qs(fv_el.attrib['value']).items())
 | 
			
		||||
 | 
			
		||||
        def find_param(name):
 | 
			
		||||
            if name in flashvars:
 | 
			
		||||
                return flashvars[name]
 | 
			
		||||
            node = find_xpath_attr(object_doc, './param', 'name', name)
 | 
			
		||||
            if node is not None:
 | 
			
		||||
                return node.attrib['value']
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        params = {}
 | 
			
		||||
 | 
			
		||||
        playerID = find_param('playerID')
 | 
			
		||||
        if playerID is None:
 | 
			
		||||
            raise ExtractorError('Cannot find player ID')
 | 
			
		||||
        params['playerID'] = playerID
 | 
			
		||||
 | 
			
		||||
        playerKey = find_param('playerKey')
 | 
			
		||||
        # Not all pages define this value
 | 
			
		||||
        if playerKey is not None:
 | 
			
		||||
@@ -114,8 +129,12 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		||||
        if it can't be found
 | 
			
		||||
        """
 | 
			
		||||
        m_brightcove = re.search(
 | 
			
		||||
            r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
 | 
			
		||||
            webpage, re.DOTALL)
 | 
			
		||||
            r'''(?sx)<object
 | 
			
		||||
            (?:
 | 
			
		||||
                :[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
 | 
			
		||||
                [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
 | 
			
		||||
            ).+?</object>''',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if m_brightcove is not None:
 | 
			
		||||
            return cls._build_brighcove_url(m_brightcove.group())
 | 
			
		||||
        else:
 | 
			
		||||
@@ -156,6 +175,7 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		||||
        info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
 | 
			
		||||
        info = json.loads(info)['data']
 | 
			
		||||
        video_info = info['programmedContent']['videoPlayer']['mediaDTO']
 | 
			
		||||
        video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
 | 
			
		||||
 | 
			
		||||
        return self._extract_video_info(video_info)
 | 
			
		||||
 | 
			
		||||
@@ -193,6 +213,23 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		||||
            info.update({
 | 
			
		||||
                'url': video_info['FLVFullLengthURL'],
 | 
			
		||||
            })
 | 
			
		||||
        else:
 | 
			
		||||
 | 
			
		||||
        if self._downloader.params.get('include_ads', False):
 | 
			
		||||
            adServerURL = video_info.get('_youtubedl_adServerURL')
 | 
			
		||||
            if adServerURL:
 | 
			
		||||
                ad_info = {
 | 
			
		||||
                    '_type': 'url',
 | 
			
		||||
                    'url': adServerURL,
 | 
			
		||||
                }
 | 
			
		||||
                if 'url' in info:
 | 
			
		||||
                    return {
 | 
			
		||||
                        '_type': 'playlist',
 | 
			
		||||
                        'title': info['title'],
 | 
			
		||||
                        'entries': [ad_info, info],
 | 
			
		||||
                    }
 | 
			
		||||
                else:
 | 
			
		||||
                    return ad_info
 | 
			
		||||
 | 
			
		||||
        if 'url' not in info:
 | 
			
		||||
            raise ExtractorError('Unable to extract video url for %s' % info['id'])
 | 
			
		||||
        return info
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@ import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    fix_xml_all_ampersand,
 | 
			
		||||
    fix_xml_ampersands
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -33,7 +33,7 @@ class ClipsyndicateIE(InfoExtractor):
 | 
			
		||||
        pdoc = self._download_xml(
 | 
			
		||||
            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
 | 
			
		||||
            video_id, u'Downloading video info',
 | 
			
		||||
            transform_source=fix_xml_all_ampersand) 
 | 
			
		||||
            transform_source=fix_xml_ampersands)
 | 
			
		||||
 | 
			
		||||
        track_doc = pdoc.find('trackList/track')
 | 
			
		||||
        def find_param(name):
 | 
			
		||||
 
 | 
			
		||||
@@ -25,12 +25,13 @@ class CNNIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
 | 
			
		||||
        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
 | 
			
		||||
        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
 | 
			
		||||
        u"info_dict": {
 | 
			
		||||
            u"title": "Student's epic speech stuns new freshmen",
 | 
			
		||||
            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
 | 
			
		||||
        "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
 | 
			
		||||
        "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
 | 
			
		||||
        "md5": "b5cc60c60a3477d185af8f19a2a26f4e",
 | 
			
		||||
        "info_dict": {
 | 
			
		||||
            "title": "Student's epic speech stuns new freshmen",
 | 
			
		||||
            "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
 | 
			
		||||
            "upload_date": "20130821",
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,5 @@
 | 
			
		||||
import base64
 | 
			
		||||
import hashlib
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
@@ -219,6 +220,8 @@ class InfoExtractor(object):
 | 
			
		||||
                          webpage_bytes[:1024])
 | 
			
		||||
            if m:
 | 
			
		||||
                encoding = m.group(1).decode('ascii')
 | 
			
		||||
            elif webpage_bytes.startswith(b'\xff\xfe'):
 | 
			
		||||
                encoding = 'utf-16'
 | 
			
		||||
            else:
 | 
			
		||||
                encoding = 'utf-8'
 | 
			
		||||
        if self._downloader.params.get('dump_intermediate_pages', False):
 | 
			
		||||
@@ -234,6 +237,9 @@ class InfoExtractor(object):
 | 
			
		||||
                url = url_or_request.get_full_url()
 | 
			
		||||
            except AttributeError:
 | 
			
		||||
                url = url_or_request
 | 
			
		||||
            if len(url) > 200:
 | 
			
		||||
                h = u'___' + hashlib.md5(url).hexdigest()
 | 
			
		||||
                url = url[:200 - len(h)] + h
 | 
			
		||||
            raw_filename = ('%s_%s.dump' % (video_id, url))
 | 
			
		||||
            filename = sanitize_filename(raw_filename, restricted=True)
 | 
			
		||||
            self.to_screen(u'Saving request to ' + filename)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,5 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
@@ -20,30 +21,31 @@ class CondeNastIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    # The keys are the supported sites and the values are the name to be shown
 | 
			
		||||
    # to the user and in the extractor description.
 | 
			
		||||
    _SITES = {'wired': u'WIRED',
 | 
			
		||||
              'gq': u'GQ',
 | 
			
		||||
              'vogue': u'Vogue',
 | 
			
		||||
              'glamour': u'Glamour',
 | 
			
		||||
              'wmagazine': u'W Magazine',
 | 
			
		||||
              'vanityfair': u'Vanity Fair',
 | 
			
		||||
              }
 | 
			
		||||
    _SITES = {
 | 
			
		||||
        'wired': 'WIRED',
 | 
			
		||||
        'gq': 'GQ',
 | 
			
		||||
        'vogue': 'Vogue',
 | 
			
		||||
        'glamour': 'Glamour',
 | 
			
		||||
        'wmagazine': 'W Magazine',
 | 
			
		||||
        'vanityfair': 'Vanity Fair',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
 | 
			
		||||
    IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
 | 
			
		||||
    IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
 | 
			
		||||
        u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
 | 
			
		||||
        u'md5': u'1921f713ed48aabd715691f774c451f7',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'3D Printed Speakers Lit With LED',
 | 
			
		||||
            u'description': u'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
 | 
			
		||||
        'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
 | 
			
		||||
        'file': '5171b343c2b4c00dd0c1ccb3.mp4',
 | 
			
		||||
        'md5': '1921f713ed48aabd715691f774c451f7',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'title': '3D Printed Speakers Lit With LED',
 | 
			
		||||
            'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _extract_series(self, url, webpage):
 | 
			
		||||
        title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
 | 
			
		||||
                                        webpage, u'series title', flags=re.DOTALL)
 | 
			
		||||
                                        webpage, 'series title', flags=re.DOTALL)
 | 
			
		||||
        url_object = compat_urllib_parse_urlparse(url)
 | 
			
		||||
        base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
 | 
			
		||||
        m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
 | 
			
		||||
@@ -57,39 +59,41 @@ class CondeNastIE(InfoExtractor):
 | 
			
		||||
        description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
 | 
			
		||||
                                               r'<div class="video-post-content">(.+?)</div>',
 | 
			
		||||
                                               ],
 | 
			
		||||
                                              webpage, u'description',
 | 
			
		||||
                                              webpage, 'description',
 | 
			
		||||
                                              fatal=False, flags=re.DOTALL)
 | 
			
		||||
        params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
 | 
			
		||||
                                    u'player params', flags=re.DOTALL)
 | 
			
		||||
        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
 | 
			
		||||
        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
 | 
			
		||||
        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
 | 
			
		||||
                                    'player params', flags=re.DOTALL)
 | 
			
		||||
        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
 | 
			
		||||
        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
 | 
			
		||||
        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
 | 
			
		||||
        data = compat_urllib_parse.urlencode({'videoId': video_id,
 | 
			
		||||
                                              'playerId': player_id,
 | 
			
		||||
                                              'target': target,
 | 
			
		||||
                                              })
 | 
			
		||||
        base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
 | 
			
		||||
                                           webpage, u'base info url',
 | 
			
		||||
                                           webpage, 'base info url',
 | 
			
		||||
                                           default='http://player.cnevids.com/player/loader.js?')
 | 
			
		||||
        info_url = base_info_url + data
 | 
			
		||||
        info_page = self._download_webpage(info_url, video_id,
 | 
			
		||||
                                           u'Downloading video info')
 | 
			
		||||
        video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
 | 
			
		||||
                                           'Downloading video info')
 | 
			
		||||
        video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
 | 
			
		||||
        video_info = json.loads(video_info)
 | 
			
		||||
 | 
			
		||||
        def _formats_sort_key(f):
 | 
			
		||||
            type_ord = 1 if f['type'] == 'video/mp4' else 0
 | 
			
		||||
            quality_ord = 1 if f['quality'] == 'high' else 0
 | 
			
		||||
            return (quality_ord, type_ord)
 | 
			
		||||
        best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
 | 
			
		||||
            'url': fdata['src'],
 | 
			
		||||
            'ext': fdata['type'].split('/')[-1],
 | 
			
		||||
            'quality': 1 if fdata['quality'] == 'high' else 0,
 | 
			
		||||
        } for fdata in video_info['sources'][0]]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'url': best_format['src'],
 | 
			
		||||
                'ext': best_format['type'].split('/')[-1],
 | 
			
		||||
                'title': video_info['title'],
 | 
			
		||||
                'thumbnail': video_info['poster_frame'],
 | 
			
		||||
                'description': description,
 | 
			
		||||
                }
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': video_info['title'],
 | 
			
		||||
            'thumbnail': video_info['poster_frame'],
 | 
			
		||||
            'description': description,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										31
									
								
								youtube_dl/extractor/dropbox.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								youtube_dl/extractor/dropbox.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,31 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import os.path
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DropboxIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.dropbox.com/s/mcnzehi9wo55th4/20131219_085616.mp4',
 | 
			
		||||
        'file': 'mcnzehi9wo55th4.mp4',
 | 
			
		||||
        'md5': 'f6d65b1b326e82fd7ab7720bea3dacae',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'title': '20131219_085616'
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        title = os.path.splitext(mobj.group('title'))[0]
 | 
			
		||||
        video_url = url + '?dl=1'
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/everyonesmixtape.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/everyonesmixtape.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,69 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EveryonesMixtapeIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
 | 
			
		||||
        'file': '5bfseWNmlds.mp4',
 | 
			
		||||
        "info_dict": {
 | 
			
		||||
            "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
 | 
			
		||||
            "uploader": "FKR.TV",
 | 
			
		||||
            "uploader_id": "frenchkissrecords",
 | 
			
		||||
            "description": "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
 | 
			
		||||
            "upload_date": "20081015"
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,  # This is simply YouTube
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        playlist_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
 | 
			
		||||
        pllist_req = compat_urllib_request.Request(pllist_url)
 | 
			
		||||
        pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
 | 
			
		||||
 | 
			
		||||
        playlist_list = self._download_json(
 | 
			
		||||
            pllist_req, playlist_id, note='Downloading playlist metadata')
 | 
			
		||||
        try:
 | 
			
		||||
            playlist_no = next(playlist['id']
 | 
			
		||||
                               for playlist in playlist_list
 | 
			
		||||
                               if playlist['code'] == playlist_id)
 | 
			
		||||
        except StopIteration:
 | 
			
		||||
            raise ExtractorError('Playlist id not found')
 | 
			
		||||
 | 
			
		||||
        pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
 | 
			
		||||
        pl_req = compat_urllib_request.Request(pl_url)
 | 
			
		||||
        pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
 | 
			
		||||
        playlist = self._download_json(
 | 
			
		||||
            pl_req, playlist_id, note='Downloading playlist info')
 | 
			
		||||
 | 
			
		||||
        entries = [{
 | 
			
		||||
            '_type': 'url',
 | 
			
		||||
            'url': t['url'],
 | 
			
		||||
            'title': t['title'],
 | 
			
		||||
        } for t in playlist['tracks']]
 | 
			
		||||
 | 
			
		||||
        if mobj.group('songnr'):
 | 
			
		||||
            songnr = int(mobj.group('songnr')) - 1
 | 
			
		||||
            return entries[songnr]
 | 
			
		||||
 | 
			
		||||
        playlist_title = playlist['mixData']['name']
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': playlist_id,
 | 
			
		||||
            'title': playlist_title,
 | 
			
		||||
            'entries': entries,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
@@ -11,13 +13,13 @@ class FlickrIE(InfoExtractor):
 | 
			
		||||
    """Information Extractor for Flickr videos"""
 | 
			
		||||
    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
 | 
			
		||||
        u'file': u'5645318632.mp4',
 | 
			
		||||
        u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
 | 
			
		||||
            u"uploader_id": u"forestwander-nature-pictures", 
 | 
			
		||||
            u"title": u"Dark Hollow Waterfalls"
 | 
			
		||||
        'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
 | 
			
		||||
        'file': '5645318632.mp4',
 | 
			
		||||
        'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
 | 
			
		||||
            "uploader_id": "forestwander-nature-pictures", 
 | 
			
		||||
            "title": "Dark Hollow Waterfalls"
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -29,13 +31,13 @@ class FlickrIE(InfoExtractor):
 | 
			
		||||
        webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
 | 
			
		||||
        webpage = self._download_webpage(webpage_url, video_id)
 | 
			
		||||
 | 
			
		||||
        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
 | 
			
		||||
        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret')
 | 
			
		||||
 | 
			
		||||
        first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
 | 
			
		||||
        first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 | 
			
		||||
 | 
			
		||||
        node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
 | 
			
		||||
            first_xml, u'node_id')
 | 
			
		||||
            first_xml, 'node_id')
 | 
			
		||||
 | 
			
		||||
        second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
 | 
			
		||||
        second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
 | 
			
		||||
@@ -44,7 +46,7 @@ class FlickrIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Unable to extract video url')
 | 
			
		||||
            raise ExtractorError('Unable to extract video url')
 | 
			
		||||
        video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/franceinter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/franceinter.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FranceInterIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
 | 
			
		||||
        'file': '793962.mp3',
 | 
			
		||||
        'md5': '4764932e466e6f6c79c317d2e74f6884',
 | 
			
		||||
        "info_dict": {
 | 
			
		||||
            "title": "L’Histoire dans les jeux vidéo",
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title')
 | 
			
		||||
        path = self._search_regex(
 | 
			
		||||
            r'&urlAOD=(.*?)&startTime', webpage, 'video url')
 | 
			
		||||
        video_url = 'http://www.franceinter.fr/' + path
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': [{
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'vcodec': 'none',
 | 
			
		||||
            }],
 | 
			
		||||
            'title': title,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
@@ -13,12 +15,12 @@ from ..utils import (
 | 
			
		||||
class GameSpotIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
 | 
			
		||||
        u"file": u"gs-2300-6410818.mp4",
 | 
			
		||||
        u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
 | 
			
		||||
        u"info_dict": {
 | 
			
		||||
            u"title": u"Arma 3 - Community Guide: SITREP I",
 | 
			
		||||
            u'description': u'Check out this video where some of the basics of Arma 3 is explained.',
 | 
			
		||||
        "url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
 | 
			
		||||
        "file": "gs-2300-6410818.mp4",
 | 
			
		||||
        "md5": "b2a30deaa8654fcccd43713a6b6a4825",
 | 
			
		||||
        "info_dict": {
 | 
			
		||||
            "title": "Arma 3 - Community Guide: SITREP I",
 | 
			
		||||
            'description': 'Check out this video where some of the basics of Arma 3 is explained.',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -92,11 +92,12 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        # ooyala video
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 | 
			
		||||
            'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4',
 | 
			
		||||
            'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': '2cc213299525360.mov', #that's what we get
 | 
			
		||||
                'title': '2cc213299525360.mov',  # that's what we get
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,21 +1,24 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import hashlib
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class KankanIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
 | 
			
		||||
    
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
 | 
			
		||||
        u'file': u'48863.flv',
 | 
			
		||||
        u'md5': u'29aca1e47ae68fc28804aca89f29507e',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Ready To Go',
 | 
			
		||||
        'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
 | 
			
		||||
        'file': '48863.flv',
 | 
			
		||||
        'md5': '29aca1e47ae68fc28804aca89f29507e',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'title': 'Ready To Go',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Only available from China',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -23,22 +26,23 @@ class KankanIE(InfoExtractor):
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
 | 
			
		||||
        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
 | 
			
		||||
        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
 | 
			
		||||
        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
 | 
			
		||||
        gcid = gcids[-1]
 | 
			
		||||
 | 
			
		||||
        video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
 | 
			
		||||
                                                 video_id, u'Downloading video url info')
 | 
			
		||||
        ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
 | 
			
		||||
        path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
 | 
			
		||||
        param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
 | 
			
		||||
        param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
 | 
			
		||||
        info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
 | 
			
		||||
        video_info_page = self._download_webpage(
 | 
			
		||||
            info_url, video_id, 'Downloading video url info')
 | 
			
		||||
        ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
 | 
			
		||||
        path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
 | 
			
		||||
        param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
 | 
			
		||||
        param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
 | 
			
		||||
        key = _md5('xl_mp43651' + param1 + param2)
 | 
			
		||||
        video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': determine_ext(video_url),
 | 
			
		||||
                }
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@ import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    fix_xml_all_ampersand,
 | 
			
		||||
    fix_xml_ampersands,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -27,7 +27,7 @@ class MetacriticIE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        # The xml is not well formatted, there are raw '&'
 | 
			
		||||
        info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
 | 
			
		||||
            video_id, 'Downloading info xml', transform_source=fix_xml_all_ampersand)
 | 
			
		||||
            video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
 | 
			
		||||
 | 
			
		||||
        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
 | 
			
		||||
        formats = []
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,5 @@
 | 
			
		||||
import json
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
@@ -10,17 +11,17 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class MixcloudIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
 | 
			
		||||
    IE_NAME = u'mixcloud'
 | 
			
		||||
    IE_NAME = 'mixcloud'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
 | 
			
		||||
        u'file': u'dholbach-cryptkeeper.mp3',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Cryptkeeper',
 | 
			
		||||
            u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
 | 
			
		||||
            u'uploader': u'Daniel Holbach',
 | 
			
		||||
            u'uploader_id': u'dholbach',
 | 
			
		||||
            u'upload_date': u'20111115',
 | 
			
		||||
        'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
 | 
			
		||||
        'file': 'dholbach-cryptkeeper.mp3',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'title': 'Cryptkeeper',
 | 
			
		||||
            'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
 | 
			
		||||
            'uploader': 'Daniel Holbach',
 | 
			
		||||
            'uploader_id': 'dholbach',
 | 
			
		||||
            'upload_date': '20111115',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -42,17 +43,18 @@ class MixcloudIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
 | 
			
		||||
        uploader = mobj.group(1)
 | 
			
		||||
        cloudcast_name = mobj.group(2)
 | 
			
		||||
        track_id = '-'.join((uploader, cloudcast_name))
 | 
			
		||||
        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
 | 
			
		||||
        webpage = self._download_webpage(url, track_id)
 | 
			
		||||
        json_data = self._download_webpage(api_url, track_id,
 | 
			
		||||
            u'Downloading cloudcast info')
 | 
			
		||||
        info = json.loads(json_data)
 | 
			
		||||
 | 
			
		||||
        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
 | 
			
		||||
        webpage = self._download_webpage(url, track_id)
 | 
			
		||||
 | 
			
		||||
        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
 | 
			
		||||
        info = self._download_json(
 | 
			
		||||
            api_url, track_id, 'Downloading cloudcast info')
 | 
			
		||||
 | 
			
		||||
        preview_url = self._search_regex(
 | 
			
		||||
            r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
 | 
			
		||||
        song_url = preview_url.replace('/previews/', '/c/originals/')
 | 
			
		||||
        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
 | 
			
		||||
        final_song_url = self._get_url(template_url)
 | 
			
		||||
 
 | 
			
		||||
@@ -34,7 +34,7 @@ class MporaIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        data = json.loads(data_json)
 | 
			
		||||
 | 
			
		||||
        uploader = data['info_overlay']['name']
 | 
			
		||||
        uploader = data['info_overlay'].get('username')
 | 
			
		||||
        duration = data['video']['duration'] // 1000
 | 
			
		||||
        thumbnail = data['video']['encodings']['sd']['poster']
 | 
			
		||||
        title = data['info_overlay']['title']
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@ from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    fix_xml_ampersands,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
def _media_xml_tag(tag):
 | 
			
		||||
@@ -83,12 +84,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
			
		||||
        video_id = self._id_from_uri(uri)
 | 
			
		||||
        data = compat_urllib_parse.urlencode({'uri': uri})
 | 
			
		||||
 | 
			
		||||
        def fix_ampersand(s):
 | 
			
		||||
            """ Fix unencoded ampersand in XML """
 | 
			
		||||
            return s.replace(u'& ', '& ')
 | 
			
		||||
        idoc = self._download_xml(
 | 
			
		||||
            self._FEED_URL + '?' + data, video_id,
 | 
			
		||||
            u'Downloading info', transform_source=fix_ampersand)
 | 
			
		||||
            u'Downloading info', transform_source=fix_xml_ampersands)
 | 
			
		||||
        return [self._get_video_info(item) for item in idoc.findall('.//item')]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
@@ -8,41 +10,75 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MySpaceIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
 | 
			
		||||
    _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'id': u'100008689',
 | 
			
		||||
            u'ext': u'flv',
 | 
			
		||||
            u'title': u'Viva La Vida',
 | 
			
		||||
            u'description': u'The official Viva La Vida video, directed by Hype Williams',
 | 
			
		||||
            u'uploader': u'Coldplay',
 | 
			
		||||
            u'uploader_id': u'coldplay',
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '100008689',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Viva La Vida',
 | 
			
		||||
                'description': 'The official Viva La Vida video, directed by Hype Williams',
 | 
			
		||||
                'uploader': 'Coldplay',
 | 
			
		||||
                'uploader_id': 'coldplay',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # rtmp download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        u'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            u'skip_download': True,
 | 
			
		||||
        # song
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '39008454',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Darkness In My Heart',
 | 
			
		||||
                'uploader_id': 'spiderbags',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # rtmp download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
 | 
			
		||||
            u'context'))
 | 
			
		||||
        video = context['video']
 | 
			
		||||
        rtmp_url, play_path = video['streamUrl'].split(';', 1)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': compat_str(video['mediaId']),
 | 
			
		||||
            'title': video['title'],
 | 
			
		||||
        if mobj.group('mediatype').startswith('music/song'):
 | 
			
		||||
            # songs don't store any useful info in the 'context' variable
 | 
			
		||||
            def search_data(name):
 | 
			
		||||
                return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
 | 
			
		||||
                    name)
 | 
			
		||||
            streamUrl = search_data('stream-url')
 | 
			
		||||
            info = {
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': self._og_search_title(webpage),
 | 
			
		||||
                'uploader_id': search_data('artist-username'),
 | 
			
		||||
                'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            }
 | 
			
		||||
        else:
 | 
			
		||||
            context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
 | 
			
		||||
                u'context'))
 | 
			
		||||
            video = context['video']
 | 
			
		||||
            streamUrl = video['streamUrl']
 | 
			
		||||
            info = {
 | 
			
		||||
                'id': compat_str(video['mediaId']),
 | 
			
		||||
                'title': video['title'],
 | 
			
		||||
                'description': video['description'],
 | 
			
		||||
                'thumbnail': video['imageUrl'],
 | 
			
		||||
                'uploader': video['artistName'],
 | 
			
		||||
                'uploader_id': video['artistUsername'],
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        rtmp_url, play_path = streamUrl.split(';', 1)
 | 
			
		||||
        info.update({
 | 
			
		||||
            'url': rtmp_url,
 | 
			
		||||
            'play_path': play_path,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'description': video['description'],
 | 
			
		||||
            'thumbnail': video['imageUrl'],
 | 
			
		||||
            'uploader': video['artistName'],
 | 
			
		||||
            'uploader_id': video['artistUsername'],
 | 
			
		||||
        }
 | 
			
		||||
        })
 | 
			
		||||
        return info
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										70
									
								
								youtube_dl/extractor/servingsys.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								youtube_dl/extractor/servingsys.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,70 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ServingSysIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'file': '29955898.flv',
 | 
			
		||||
            'md5': 'baed851342df6846eb8677a60a011a0f',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'title': 'AdAPPter_Hyundai_demo (1)',
 | 
			
		||||
                'duration': 74,
 | 
			
		||||
                'tbr': 1378,
 | 
			
		||||
                'width': 640,
 | 
			
		||||
                'height': 400,
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'file': '29907998.flv',
 | 
			
		||||
            'md5': '979b4da2655c4bc2d81aeb915a8c5014',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'title': 'AdAPPter_Hyundai_demo (2)',
 | 
			
		||||
                'duration': 34,
 | 
			
		||||
                'width': 854,
 | 
			
		||||
                'height': 480,
 | 
			
		||||
                'tbr': 516,
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
        'params': {
 | 
			
		||||
            'playlistend': 2,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        pl_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        vast_doc = self._download_xml(url, pl_id)
 | 
			
		||||
        title = vast_doc.find('.//AdTitle').text
 | 
			
		||||
        media = vast_doc.find('.//MediaFile').text
 | 
			
		||||
        info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
 | 
			
		||||
 | 
			
		||||
        doc = self._download_xml(info_url, pl_id, 'Downloading video info')
 | 
			
		||||
        entries = [{
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id': a.attrib['id'],
 | 
			
		||||
            'title': '%s (%s)' % (title, a.attrib['assetID']),
 | 
			
		||||
            'url': a.attrib['URL'],
 | 
			
		||||
            'duration': int_or_none(a.attrib.get('length')),
 | 
			
		||||
            'tbr': int_or_none(a.attrib.get('bitrate')),
 | 
			
		||||
            'height': int_or_none(a.attrib.get('height')),
 | 
			
		||||
            'width': int_or_none(a.attrib.get('width')),
 | 
			
		||||
        } for a in doc.findall('.//AdditionalAssets/asset')]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': pl_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'entries': entries,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,6 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
import itertools
 | 
			
		||||
@@ -32,58 +34,58 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
 | 
			
		||||
                    )
 | 
			
		||||
                    '''
 | 
			
		||||
    IE_NAME = u'soundcloud'
 | 
			
		||||
    IE_NAME = 'soundcloud'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
 | 
			
		||||
            u'file': u'62986583.mp3',
 | 
			
		||||
            u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u"upload_date": u"20121011", 
 | 
			
		||||
                u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", 
 | 
			
		||||
                u"uploader": u"E.T. ExTerrestrial Music", 
 | 
			
		||||
                u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
 | 
			
		||||
            'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
 | 
			
		||||
            'file': '62986583.mp3',
 | 
			
		||||
            'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                "upload_date": "20121011",
 | 
			
		||||
                "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
 | 
			
		||||
                "uploader": "E.T. ExTerrestrial Music",
 | 
			
		||||
                "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # not streamable song
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u'id': u'47127627',
 | 
			
		||||
                u'ext': u'mp3',
 | 
			
		||||
                u'title': u'Goldrushed',
 | 
			
		||||
                u'uploader': u'The Royal Concept',
 | 
			
		||||
                u'upload_date': u'20120521',
 | 
			
		||||
            'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '47127627',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Goldrushed',
 | 
			
		||||
                'uploader': 'The Royal Concept',
 | 
			
		||||
                'upload_date': '20120521',
 | 
			
		||||
            },
 | 
			
		||||
            u'params': {
 | 
			
		||||
            'params': {
 | 
			
		||||
                # rtmp
 | 
			
		||||
                u'skip_download': True,
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # private link
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
 | 
			
		||||
            u'md5': u'aa0dd32bfea9b0c5ef4f02aacd080604',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u'id': u'123998367',
 | 
			
		||||
                u'ext': u'mp3',
 | 
			
		||||
                u'title': u'Youtube - Dl Test Video \'\' Ä↭',
 | 
			
		||||
                u'uploader': u'jaimeMF',
 | 
			
		||||
                u'description': u'test chars:  \"\'/\\ä↭',
 | 
			
		||||
                u'upload_date': u'20131209',
 | 
			
		||||
            'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
 | 
			
		||||
            'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '123998367',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Youtube - Dl Test Video \'\' Ä↭',
 | 
			
		||||
                'uploader': 'jaimeMF',
 | 
			
		||||
                'description': 'test chars:  \"\'/\\ä↭',
 | 
			
		||||
                'upload_date': '20131209',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # downloadable song
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1',
 | 
			
		||||
            u'md5': u'56a8b69568acaa967b4c49f9d1d52d19',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u'id': u'105614606',
 | 
			
		||||
                u'ext': u'wav',
 | 
			
		||||
                u'title': u'Just Your Problem Baby (Acapella)',
 | 
			
		||||
                u'description': u'Vocals',
 | 
			
		||||
                u'uploader': u'Sim Gretina',
 | 
			
		||||
                u'upload_date': u'20130815',
 | 
			
		||||
            'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1',
 | 
			
		||||
            'md5': '56a8b69568acaa967b4c49f9d1d52d19',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '105614606',
 | 
			
		||||
                'ext': 'wav',
 | 
			
		||||
                'title': 'Just Your Problem Baby (Acapella)',
 | 
			
		||||
                'description': 'Vocals',
 | 
			
		||||
                'uploader': 'Sim Gretina',
 | 
			
		||||
                'upload_date': '20130815',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
@@ -112,7 +114,7 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
        thumbnail = info['artwork_url']
 | 
			
		||||
        if thumbnail is not None:
 | 
			
		||||
            thumbnail = thumbnail.replace('-large', '-t500x500')
 | 
			
		||||
        ext = u'mp3'
 | 
			
		||||
        ext = 'mp3'
 | 
			
		||||
        result = {
 | 
			
		||||
            'id': track_id,
 | 
			
		||||
            'uploader': info['user']['username'],
 | 
			
		||||
@@ -124,11 +126,11 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
        if info.get('downloadable', False):
 | 
			
		||||
            # We can build a direct link to the song
 | 
			
		||||
            format_url = (
 | 
			
		||||
                u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
 | 
			
		||||
                'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
 | 
			
		||||
                    track_id, self._CLIENT_ID))
 | 
			
		||||
            result['formats'] = [{
 | 
			
		||||
                'format_id': 'download',
 | 
			
		||||
                'ext': info.get('original_format', u'mp3'),
 | 
			
		||||
                'ext': info.get('original_format', 'mp3'),
 | 
			
		||||
                'url': format_url,
 | 
			
		||||
                'vcodec': 'none',
 | 
			
		||||
            }]
 | 
			
		||||
@@ -138,7 +140,7 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
                'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
 | 
			
		||||
            stream_json = self._download_webpage(
 | 
			
		||||
                streams_url,
 | 
			
		||||
                track_id, u'Downloading track url')
 | 
			
		||||
                track_id, 'Downloading track url')
 | 
			
		||||
 | 
			
		||||
            formats = []
 | 
			
		||||
            format_dict = json.loads(stream_json)
 | 
			
		||||
@@ -165,20 +167,19 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
                # We fallback to the stream_url in the original info, this
 | 
			
		||||
                # cannot be always used, sometimes it can give an HTTP 404 error
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': u'fallback',
 | 
			
		||||
                    'format_id': 'fallback',
 | 
			
		||||
                    'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
 | 
			
		||||
                    'ext': ext,
 | 
			
		||||
                    'vcodec': 'none',
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            def format_pref(f):
 | 
			
		||||
            for f in formats:
 | 
			
		||||
                if f['format_id'].startswith('http'):
 | 
			
		||||
                    return 2
 | 
			
		||||
                    f['protocol'] = 'http'
 | 
			
		||||
                if f['format_id'].startswith('rtmp'):
 | 
			
		||||
                    return 1
 | 
			
		||||
                return 0
 | 
			
		||||
                    f['protocol'] = 'rtmp'
 | 
			
		||||
 | 
			
		||||
            formats.sort(key=format_pref)
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
            result['formats'] = formats
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
@@ -210,14 +211,14 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
    
 | 
			
		||||
            url = 'http://soundcloud.com/%s' % resolve_title
 | 
			
		||||
            info_json_url = self._resolv_url(url)
 | 
			
		||||
        info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
 | 
			
		||||
        info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON')
 | 
			
		||||
 | 
			
		||||
        info = json.loads(info_json)
 | 
			
		||||
        return self._extract_info_dict(info, full_title, secret_token=token)
 | 
			
		||||
 | 
			
		||||
class SoundcloudSetIE(SoundcloudIE):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
 | 
			
		||||
    IE_NAME = u'soundcloud:set'
 | 
			
		||||
    IE_NAME = 'soundcloud:set'
 | 
			
		||||
    # it's in tests/test_playlists.py
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
 | 
			
		||||
@@ -254,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE):
 | 
			
		||||
 | 
			
		||||
class SoundcloudUserIE(SoundcloudIE):
 | 
			
		||||
    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
 | 
			
		||||
    IE_NAME = u'soundcloud:user'
 | 
			
		||||
    IE_NAME = 'soundcloud:user'
 | 
			
		||||
 | 
			
		||||
    # it's in tests/test_playlists.py
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
@@ -266,7 +267,7 @@ class SoundcloudUserIE(SoundcloudIE):
 | 
			
		||||
        url = 'http://soundcloud.com/%s/' % uploader
 | 
			
		||||
        resolv_url = self._resolv_url(url)
 | 
			
		||||
        user_json = self._download_webpage(resolv_url, uploader,
 | 
			
		||||
            u'Downloading user info')
 | 
			
		||||
            'Downloading user info')
 | 
			
		||||
        user = json.loads(user_json)
 | 
			
		||||
 | 
			
		||||
        tracks = []
 | 
			
		||||
@@ -276,7 +277,7 @@ class SoundcloudUserIE(SoundcloudIE):
 | 
			
		||||
                                                  })
 | 
			
		||||
            tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
 | 
			
		||||
            response = self._download_webpage(tracks_url, uploader, 
 | 
			
		||||
                u'Downloading tracks page %s' % (i+1))
 | 
			
		||||
                'Downloading tracks page %s' % (i+1))
 | 
			
		||||
            new_tracks = json.loads(response)
 | 
			
		||||
            tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
 | 
			
		||||
            if len(new_tracks) < 50:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
@@ -11,17 +13,18 @@ from ..aes import (
 | 
			
		||||
    aes_decrypt_text
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SpankwireIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
 | 
			
		||||
        u'file': u'103545.mp4',
 | 
			
		||||
        u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"uploader": u"oreusz", 
 | 
			
		||||
            u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
 | 
			
		||||
            u"description": u"Crazy Bitch X rated music video.",
 | 
			
		||||
            u"age_limit": 18,
 | 
			
		||||
        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
 | 
			
		||||
        'file': '103545.mp4',
 | 
			
		||||
        'md5': '1b3f55e345500552dbc252a3e9c1af43',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            "uploader": "oreusz",
 | 
			
		||||
            "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
 | 
			
		||||
            "description": "Crazy Bitch X rated music video.",
 | 
			
		||||
            "age_limit": 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -34,17 +37,17 @@ class SpankwireIE(InfoExtractor):
 | 
			
		||||
        req.add_header('Cookie', 'age_verified=1')
 | 
			
		||||
        webpage = self._download_webpage(req, video_id)
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
 | 
			
		||||
        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
 | 
			
		||||
        video_uploader = self._html_search_regex(
 | 
			
		||||
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
 | 
			
		||||
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
 | 
			
		||||
        thumbnail = self._html_search_regex(
 | 
			
		||||
            r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
 | 
			
		||||
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
 | 
			
		||||
        description = self._html_search_regex(
 | 
			
		||||
            r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
 | 
			
		||||
            r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
 | 
			
		||||
 | 
			
		||||
        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
 | 
			
		||||
        if webpage.find('flashvars\.encrypted = "true"') != -1:
 | 
			
		||||
            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
 | 
			
		||||
            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ')
 | 
			
		||||
            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
@@ -52,14 +55,21 @@ class SpankwireIE(InfoExtractor):
 | 
			
		||||
            path = compat_urllib_parse_urlparse(video_url).path
 | 
			
		||||
            extension = os.path.splitext(path)[1][1:]
 | 
			
		||||
            format = path.split('/')[4].split('_')[:2]
 | 
			
		||||
            resolution, bitrate_str = format
 | 
			
		||||
            format = "-".join(format)
 | 
			
		||||
            height = int(resolution.rstrip('P'))
 | 
			
		||||
            tbr = int(bitrate_str.rstrip('K'))
 | 
			
		||||
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': extension,
 | 
			
		||||
                'resolution': resolution,
 | 
			
		||||
                'format': format,
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
                'height': height,
 | 
			
		||||
                'format_id': format,
 | 
			
		||||
            })
 | 
			
		||||
        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        age_limit = self._rta_search(webpage)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
@@ -9,61 +11,66 @@ from ..utils import (
 | 
			
		||||
class TeamcocoIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
 | 
			
		||||
        u'file': u'19705.mp4',
 | 
			
		||||
        u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
 | 
			
		||||
            u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
 | 
			
		||||
        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
 | 
			
		||||
        'file': '19705.mp4',
 | 
			
		||||
        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
 | 
			
		||||
            "title": "Louis C.K. Interview Pt. 1 11/3/11"
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
            raise ExtractorError('Invalid URL: %s' % url)
 | 
			
		||||
        url_title = mobj.group('url_title')
 | 
			
		||||
        webpage = self._download_webpage(url, url_title)
 | 
			
		||||
 | 
			
		||||
        video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
 | 
			
		||||
            webpage, u'video id')
 | 
			
		||||
        video_id = self._html_search_regex(
 | 
			
		||||
            r'<article class="video" data-id="(\d+?)"',
 | 
			
		||||
            webpage, 'video id')
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
 | 
			
		||||
        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 | 
			
		||||
        data = self._download_xml(data_url, video_id, 'Downloading data webpage')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        qualities = ['500k', '480p', '1000k', '720p', '1080p']
 | 
			
		||||
        formats = []
 | 
			
		||||
        for file in data.findall('files/file'):
 | 
			
		||||
            if file.attrib.get('playmode') == 'all':
 | 
			
		||||
        for filed in data.findall('files/file'):
 | 
			
		||||
            if filed.attrib.get('playmode') == 'all':
 | 
			
		||||
                # it just duplicates one of the entries
 | 
			
		||||
                break
 | 
			
		||||
            file_url = file.text
 | 
			
		||||
            file_url = filed.text
 | 
			
		||||
            m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
 | 
			
		||||
            if m_format is not None:
 | 
			
		||||
                format_id = m_format.group(1)
 | 
			
		||||
            else:
 | 
			
		||||
                format_id = file.attrib['bitrate']
 | 
			
		||||
                format_id = filed.attrib['bitrate']
 | 
			
		||||
            tbr = (
 | 
			
		||||
                int(filed.attrib['bitrate'])
 | 
			
		||||
                if filed.attrib['bitrate'].isdigit()
 | 
			
		||||
                else None)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                quality = qualities.index(format_id)
 | 
			
		||||
            except ValueError:
 | 
			
		||||
                quality = -1
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': file_url,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'quality': quality,
 | 
			
		||||
            })
 | 
			
		||||
        def sort_key(f):
 | 
			
		||||
            try:
 | 
			
		||||
                return qualities.index(f['format_id'])
 | 
			
		||||
            except ValueError:
 | 
			
		||||
                return -1
 | 
			
		||||
        formats.sort(key=sort_key)
 | 
			
		||||
        if not formats:
 | 
			
		||||
            raise ExtractorError(u'Unable to extract video URL')
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id':          video_id,
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title':       self._og_search_title(webpage),
 | 
			
		||||
            'thumbnail':   self._og_search_thumbnail(webpage),
 | 
			
		||||
            'title': self._og_search_title(webpage),
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
@@ -7,6 +9,7 @@ from ..utils import (
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TEDIE(SubtitlesInfoExtractor):
 | 
			
		||||
    _VALID_URL=r'''http://www\.ted\.com/
 | 
			
		||||
                   (
 | 
			
		||||
@@ -18,12 +21,12 @@ class TEDIE(SubtitlesInfoExtractor):
 | 
			
		||||
                   /(?P<name>\w+) # Here goes the name and then ".html"
 | 
			
		||||
                   '''
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
 | 
			
		||||
        u'file': u'102.mp4',
 | 
			
		||||
        u'md5': u'2d76ee1576672e0bd8f187513267adf6',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922", 
 | 
			
		||||
            u"title": u"Dan Dennett: The illusion of consciousness"
 | 
			
		||||
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
 | 
			
		||||
        'file': '102.mp4',
 | 
			
		||||
        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
 | 
			
		||||
            "title": "Dan Dennett: The illusion of consciousness"
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -47,7 +50,7 @@ class TEDIE(SubtitlesInfoExtractor):
 | 
			
		||||
        '''Returns the videos of the playlist'''
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            url, playlist_id, u'Downloading playlist webpage')
 | 
			
		||||
            url, playlist_id, 'Downloading playlist webpage')
 | 
			
		||||
        matches = re.finditer(
 | 
			
		||||
            r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
 | 
			
		||||
            webpage)
 | 
			
		||||
 
 | 
			
		||||
@@ -27,6 +27,7 @@ from ..utils import (
 | 
			
		||||
    get_element_by_id,
 | 
			
		||||
    get_element_by_attribute,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
@@ -131,6 +132,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
                     (
 | 
			
		||||
                         (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
 | 
			
		||||
                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 | 
			
		||||
                            (?:www\.)?deturl\.com/www\.youtube\.com/|
 | 
			
		||||
                            (?:www\.)?pwnyoutube\.com|
 | 
			
		||||
                            tube\.majestyc\.net/|
 | 
			
		||||
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 | 
			
		||||
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 | 
			
		||||
@@ -1446,7 +1449,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
        playlist_title = self._og_search_title(page)
 | 
			
		||||
        try:
 | 
			
		||||
            playlist_title = self._og_search_title(page)
 | 
			
		||||
        except RegexNotFoundError:
 | 
			
		||||
            self.report_warning(
 | 
			
		||||
                u'Playlist page is missing OpenGraph title, falling back ...',
 | 
			
		||||
                playlist_id)
 | 
			
		||||
            playlist_title = self._html_search_regex(
 | 
			
		||||
                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
 | 
			
		||||
 | 
			
		||||
        url_results = self._ids_to_results(ids)
 | 
			
		||||
        return self.playlist_result(url_results, playlist_id, playlist_title)
 | 
			
		||||
 
 | 
			
		||||
@@ -479,6 +479,7 @@ class FFmpegMergerPP(FFmpegPostProcessor):
 | 
			
		||||
    def run(self, info):
 | 
			
		||||
        filename = info['filepath']
 | 
			
		||||
        args = ['-c', 'copy']
 | 
			
		||||
        self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
 | 
			
		||||
        self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
 | 
			
		||||
        return True, info
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1092,9 +1092,12 @@ def month_by_name(name):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def fix_xml_all_ampersand(xml_str):
 | 
			
		||||
def fix_xml_ampersands(xml_str):
 | 
			
		||||
    """Replace all the '&' by '&' in XML"""
 | 
			
		||||
    return xml_str.replace(u'&', u'&')
 | 
			
		||||
    return re.sub(
 | 
			
		||||
        r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
 | 
			
		||||
        u'&',
 | 
			
		||||
        xml_str)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def setproctitle(title):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,2 +1,2 @@
 | 
			
		||||
 | 
			
		||||
__version__ = '2013.01.17'
 | 
			
		||||
__version__ = '2014.01.21'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user