Compare commits
	
		
			116 Commits
		
	
	
		
			2014.03.04
			...
			2014.03.17
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					9d840c43b5 | ||
| 
						 | 
					6f50f63382 | ||
| 
						 | 
					ff14fc4964 | ||
| 
						 | 
					e125c21531 | ||
| 
						 | 
					93d020dd65 | ||
| 
						 | 
					a7515ec265 | ||
| 
						 | 
					b6c1ceccc2 | ||
| 
						 | 
					4056ad8f36 | ||
| 
						 | 
					6563837ee1 | ||
| 
						 | 
					fd5e6f7ef2 | ||
| 
						 | 
					15fd51b37c | ||
| 
						 | 
					f1cef7a9ff | ||
| 
						 | 
					8264223511 | ||
| 
						 | 
					bc6d597828 | ||
| 
						 | 
					aba77bbfc2 | ||
| 
						 | 
					955c451456 | ||
| 
						 | 
					e5de3f6c89 | ||
| 
						 | 
					2a1db721d4 | ||
| 
						 | 
					1e0eb60f1a | ||
| 
						 | 
					87a29e6f25 | ||
| 
						 | 
					c3d36f134f | ||
| 
						 | 
					84769e708c | ||
| 
						 | 
					9d2ecdbc71 | ||
| 
						 | 
					9b69af5342 | ||
| 
						 | 
					c21215b421 | ||
| 
						 | 
					cddcfd90b4 | ||
| 
						 | 
					f36aacba0f | ||
| 
						 | 
					355271fb61 | ||
| 
						 | 
					2a5b502364 | ||
| 
						 | 
					98ff9d82d4 | ||
| 
						 | 
					b1ff87224c | ||
| 
						 | 
					b461641fb9 | ||
| 
						 | 
					b047de6f6e | ||
| 
						 | 
					34ca5d9ba0 | ||
| 
						 | 
					60cc4dc4b4 | ||
| 
						 | 
					db95dc13a1 | ||
| 
						 | 
					777ac90791 | ||
| 
						 | 
					04f9bebbcb | ||
| 
						 | 
					4ea3137e41 | ||
| 
						 | 
					a0792b738e | ||
| 
						 | 
					19a41fc613 | ||
| 
						 | 
					3ee52157fb | ||
| 
						 | 
					c4d197ee2d | ||
| 
						 | 
					a33932cfe3 | ||
| 
						 | 
					bcf89ce62c | ||
| 
						 | 
					e3899d0e00 | ||
| 
						 | 
					dcb00da49c | ||
| 
						 | 
					aa51d20d19 | ||
| 
						 | 
					ae7ed92057 | ||
| 
						 | 
					e45b31d9bd | ||
| 
						 | 
					5a25f39653 | ||
| 
						 | 
					963d7ec412 | ||
| 
						 | 
					e712d94adf | ||
| 
						 | 
					6a72423955 | ||
| 
						 | 
					4126826b10 | ||
| 
						 | 
					b773ead7fd | ||
| 
						 | 
					855e2750bc | ||
| 
						 | 
					805ef3c60b | ||
| 
						 | 
					fbc2dcb40b | ||
| 
						 | 
					5375d7ad84 | ||
| 
						 | 
					90f3476180 | ||
| 
						 | 
					ee95c09333 | ||
| 
						 | 
					75d06db9fc | ||
| 
						 | 
					439a1fffcb | ||
| 
						 | 
					9d9d70c462 | ||
| 
						 | 
					b4a186b7be | ||
| 
						 | 
					bdebf51c8f | ||
| 
						 | 
					264b86f9b4 | ||
| 
						 | 
					9e55e37a2e | ||
| 
						 | 
					1471956573 | ||
| 
						 | 
					27865b2169 | ||
| 
						 | 
					6d07ce0162 | ||
| 
						 | 
					edb7fc5435 | ||
| 
						 | 
					31f77343f2 | ||
| 
						 | 
					63ad031583 | ||
| 
						 | 
					957688cee6 | ||
| 
						 | 
					806d6c2e8c | ||
| 
						 | 
					0ef68e04d9 | ||
| 
						 | 
					a496524db2 | ||
| 
						 | 
					935c7360cc | ||
| 
						 | 
					340b046876 | ||
| 
						 | 
					cc1db7f9b7 | ||
| 
						 | 
					a4ff6c4762 | ||
| 
						 | 
					1060425cbb | ||
| 
						 | 
					e9c092f125 | ||
| 
						 | 
					22ff5d2105 | ||
| 
						 | 
					136db7881b | ||
| 
						 | 
					dae313e725 | ||
| 
						 | 
					b74fa8cd2c | ||
| 
						 | 
					94eae04c94 | ||
| 
						 | 
					16ff7ebc77 | ||
| 
						 | 
					c361c505b0 | ||
| 
						 | 
					d37c07c575 | ||
| 
						 | 
					9d6105c9f0 | ||
| 
						 | 
					8dec03ecba | ||
| 
						 | 
					826547870b | ||
| 
						 | 
					52d6a9a61d | ||
| 
						 | 
					ad242b5fbc | ||
| 
						 | 
					3524175625 | ||
| 
						 | 
					7b9965ea93 | ||
| 
						 | 
					0a5bce566f | ||
| 
						 | 
					8012bd2424 | ||
| 
						 | 
					f55a1f0a88 | ||
| 
						 | 
					bacac173a9 | ||
| 
						 | 
					ca1fee34f2 | ||
| 
						 | 
					6dadaa9930 | ||
| 
						 | 
					553f6e4633 | ||
| 
						 | 
					652bee05f0 | ||
| 
						 | 
					d63516e9cd | ||
| 
						 | 
					e477dcf649 | ||
| 
						 | 
					9d3f7781f3 | ||
| 
						 | 
					c7095dada3 | ||
| 
						 | 
					607dbbad76 | ||
| 
						 | 
					17b75c0de1 | ||
| 
						 | 
					ab24f4f3be | ||
| 
						 | 
					1b86cc41cf | 
							
								
								
									
										14
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								README.md
									
									
									
									
									
								
							@@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     video id, %(playlist)s for the playlist the
 | 
			
		||||
                                     video is in, %(playlist_index)s for the
 | 
			
		||||
                                     position in the playlist and %% for a
 | 
			
		||||
                                     literal percent. Use - to output to stdout.
 | 
			
		||||
                                     Can also be used to download to a different
 | 
			
		||||
                                     literal percent. %(height)s and %(width)s
 | 
			
		||||
                                     for the width and height of the video
 | 
			
		||||
                                     format. %(resolution)s for a textual
 | 
			
		||||
                                     description of the resolution of the video
 | 
			
		||||
                                     format. Use - to output to stdout. Can also
 | 
			
		||||
                                     be used to download to a different
 | 
			
		||||
                                     directory, for example with -o '/my/downloa
 | 
			
		||||
                                     ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
 | 
			
		||||
    --autonumber-size NUMBER         Specifies the number of digits in
 | 
			
		||||
@@ -187,9 +191,9 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     preference using slashes: "-f 22/17/18".
 | 
			
		||||
                                     "-f mp4" and "-f flv" are also supported.
 | 
			
		||||
                                     You can also use the special names "best",
 | 
			
		||||
                                     "bestaudio", "worst", and "worstaudio". By
 | 
			
		||||
                                     default, youtube-dl will pick the best
 | 
			
		||||
                                     quality.
 | 
			
		||||
                                     "bestvideo", "bestaudio", "worst",
 | 
			
		||||
                                     "worstvideo" and "worstaudio". By default,
 | 
			
		||||
                                     youtube-dl will pick the best quality.
 | 
			
		||||
    --all-formats                    download all available video formats
 | 
			
		||||
    --prefer-free-formats            prefer free video formats unless a specific
 | 
			
		||||
                                     one is requested
 | 
			
		||||
 
 | 
			
		||||
@@ -97,7 +97,7 @@ rm -rf build
 | 
			
		||||
 | 
			
		||||
make pypi-files
 | 
			
		||||
echo "Uploading to PyPi ..."
 | 
			
		||||
python setup.py sdist upload
 | 
			
		||||
python setup.py sdist bdist_wheel upload
 | 
			
		||||
make clean
 | 
			
		||||
 | 
			
		||||
/bin/echo -e "\n### DONE!"
 | 
			
		||||
 
 | 
			
		||||
@@ -71,7 +71,7 @@ class FakeYDL(YoutubeDL):
 | 
			
		||||
            old_report_warning(message)
 | 
			
		||||
        self.report_warning = types.MethodType(report_warning, self)
 | 
			
		||||
 | 
			
		||||
def get_testcases():
 | 
			
		||||
def gettestcases():
 | 
			
		||||
    for ie in youtube_dl.extractor.gen_extractors():
 | 
			
		||||
        t = getattr(ie, '_TEST', None)
 | 
			
		||||
        if t:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								test/test_InfoExtractor.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,44 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import unittest
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
from test.helper import FakeYDL
 | 
			
		||||
from youtube_dl.extractor.common import InfoExtractor
 | 
			
		||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestIE(InfoExtractor):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestInfoExtractor(unittest.TestCase):
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        self.ie = TestIE(FakeYDL())
 | 
			
		||||
 | 
			
		||||
    def test_ie_key(self):
 | 
			
		||||
        self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
 | 
			
		||||
 | 
			
		||||
    def test_html_search_regex(self):
 | 
			
		||||
        html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
 | 
			
		||||
        search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
 | 
			
		||||
        self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
 | 
			
		||||
 | 
			
		||||
    def test_opengraph(self):
 | 
			
		||||
        ie = self.ie
 | 
			
		||||
        html = '''
 | 
			
		||||
            <meta name="og:title" content='Foo'/>
 | 
			
		||||
            <meta content="Some video's description " name="og:description"/>
 | 
			
		||||
            <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
 | 
			
		||||
            '''
 | 
			
		||||
        self.assertEqual(ie._og_search_title(html), 'Foo')
 | 
			
		||||
        self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
 | 
			
		||||
        self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -182,6 +182,24 @@ class TestFormatSelection(unittest.TestCase):
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'vid-high')
 | 
			
		||||
 | 
			
		||||
    def test_format_selection_video(self):
 | 
			
		||||
        formats = [
 | 
			
		||||
            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'},
 | 
			
		||||
            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'},
 | 
			
		||||
            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3},
 | 
			
		||||
        ]
 | 
			
		||||
        info_dict = {'formats': formats, 'extractor': 'test'}
 | 
			
		||||
 | 
			
		||||
        ydl = YDL({'format': 'bestvideo'})
 | 
			
		||||
        ydl.process_ie_result(info_dict.copy())
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'dash-video-high')
 | 
			
		||||
 | 
			
		||||
        ydl = YDL({'format': 'worstvideo'})
 | 
			
		||||
        ydl.process_ie_result(info_dict.copy())
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'dash-video-low')
 | 
			
		||||
 | 
			
		||||
    def test_youtube_format_selection(self):
 | 
			
		||||
        order = [
 | 
			
		||||
            '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ import unittest
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from test.helper import get_testcases
 | 
			
		||||
from test.helper import gettestcases
 | 
			
		||||
 | 
			
		||||
from youtube_dl.extractor import (
 | 
			
		||||
    FacebookIE,
 | 
			
		||||
@@ -105,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_no_duplicates(self):
 | 
			
		||||
        ies = gen_extractors()
 | 
			
		||||
        for tc in get_testcases():
 | 
			
		||||
        for tc in gettestcases():
 | 
			
		||||
            url = tc['url']
 | 
			
		||||
            for ie in ies:
 | 
			
		||||
                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
 | 
			
		||||
@@ -124,6 +124,8 @@ class TestAllURLsMatching(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_vimeo_matching(self):
 | 
			
		||||
        self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
 | 
			
		||||
        self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
 | 
			
		||||
        self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
 | 
			
		||||
        self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
 | 
			
		||||
        self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
 | 
			
		||||
        self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
from test.helper import (
 | 
			
		||||
    get_params,
 | 
			
		||||
    get_testcases,
 | 
			
		||||
    gettestcases,
 | 
			
		||||
    try_rm,
 | 
			
		||||
    md5,
 | 
			
		||||
    report_warning
 | 
			
		||||
@@ -51,7 +51,7 @@ def _file_md5(fn):
 | 
			
		||||
    with open(fn, 'rb') as f:
 | 
			
		||||
        return hashlib.md5(f.read()).hexdigest()
 | 
			
		||||
 | 
			
		||||
defs = get_testcases()
 | 
			
		||||
defs = gettestcases()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestDownload(unittest.TestCase):
 | 
			
		||||
@@ -144,6 +144,10 @@ def generator(test_case):
 | 
			
		||||
                        self.assertTrue(
 | 
			
		||||
                            isinstance(got, compat_str) and match_rex.match(got),
 | 
			
		||||
                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
 | 
			
		||||
                    elif isinstance(expected, type):
 | 
			
		||||
                        got = info_dict.get(info_field)
 | 
			
		||||
                        self.assertTrue(isinstance(got, expected),
 | 
			
		||||
                            u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
 | 
			
		||||
                    else:
 | 
			
		||||
                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
 | 
			
		||||
                            got = 'md5:' + md5(info_dict.get(info_field))
 | 
			
		||||
@@ -152,19 +156,19 @@ def generator(test_case):
 | 
			
		||||
                        self.assertEqual(expected, got,
 | 
			
		||||
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
 | 
			
		||||
 | 
			
		||||
                # If checkable fields are missing from the test case, print the info_dict
 | 
			
		||||
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
 | 
			
		||||
                    for key, value in info_dict.items()
 | 
			
		||||
                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
 | 
			
		||||
                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
 | 
			
		||||
                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
 | 
			
		||||
 | 
			
		||||
                # Check for the presence of mandatory fields
 | 
			
		||||
                for key in ('id', 'url', 'title', 'ext'):
 | 
			
		||||
                    self.assertTrue(key in info_dict.keys() and info_dict[key])
 | 
			
		||||
                # Check for mandatory fields that are automatically set by YoutubeDL
 | 
			
		||||
                for key in ['webpage_url', 'extractor', 'extractor_key']:
 | 
			
		||||
                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
 | 
			
		||||
 | 
			
		||||
                # If checkable fields are missing from the test case, print the info_dict
 | 
			
		||||
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
 | 
			
		||||
                    for key, value in info_dict.items()
 | 
			
		||||
                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
 | 
			
		||||
                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
 | 
			
		||||
                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
 | 
			
		||||
        finally:
 | 
			
		||||
            try_rm_tcs_files()
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -36,6 +36,7 @@ from youtube_dl.extractor import (
 | 
			
		||||
    RutubeChannelIE,
 | 
			
		||||
    GoogleSearchIE,
 | 
			
		||||
    GenericIE,
 | 
			
		||||
    TEDIE,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -98,7 +99,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '5124905')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 11)
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 6)
 | 
			
		||||
 | 
			
		||||
    def test_soundcloud_set(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -248,16 +249,25 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'python language')
 | 
			
		||||
        self.assertEqual(result['title'], 'python language')
 | 
			
		||||
        self.assertTrue(len(result['entries']) == 15)
 | 
			
		||||
        self.assertEqual(len(result['entries']), 15)
 | 
			
		||||
 | 
			
		||||
    def test_generic_rss_feed(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = GenericIE(dl)
 | 
			
		||||
        result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
 | 
			
		||||
        result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
 | 
			
		||||
        self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
 | 
			
		||||
        self.assertEqual(result['title'], 'Zero Punctuation')
 | 
			
		||||
        self.assertTrue(len(result['entries']) > 10)
 | 
			
		||||
 | 
			
		||||
    def test_ted_playlist(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = TEDIE(dl)
 | 
			
		||||
        result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '10')
 | 
			
		||||
        self.assertEqual(result['title'], 'Who are the hackers?')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 6)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -33,6 +33,7 @@ from youtube_dl.utils import (
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
    url_basename,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
            bam''')
 | 
			
		||||
        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
 | 
			
		||||
 | 
			
		||||
    def test_urlencode_postdata(self):
 | 
			
		||||
        data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
 | 
			
		||||
        self.assertTrue(isinstance(data, bytes))
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,7 @@
 | 
			
		||||
from __future__ import absolute_import, unicode_literals
 | 
			
		||||
 | 
			
		||||
import collections
 | 
			
		||||
import datetime
 | 
			
		||||
import errno
 | 
			
		||||
import io
 | 
			
		||||
import json
 | 
			
		||||
@@ -370,12 +371,15 @@ class YoutubeDL(object):
 | 
			
		||||
        Print the message to stderr, it will be prefixed with 'WARNING:'
 | 
			
		||||
        If stderr is a tty file the 'WARNING:' will be colored
 | 
			
		||||
        '''
 | 
			
		||||
        if self._err_file.isatty() and os.name != 'nt':
 | 
			
		||||
            _msg_header = '\033[0;33mWARNING:\033[0m'
 | 
			
		||||
        if self.params.get('logger') is not None:
 | 
			
		||||
            self.params['logger'].warning(message)
 | 
			
		||||
        else:
 | 
			
		||||
            _msg_header = 'WARNING:'
 | 
			
		||||
        warning_message = '%s %s' % (_msg_header, message)
 | 
			
		||||
        self.to_stderr(warning_message)
 | 
			
		||||
            if self._err_file.isatty() and os.name != 'nt':
 | 
			
		||||
                _msg_header = '\033[0;33mWARNING:\033[0m'
 | 
			
		||||
            else:
 | 
			
		||||
                _msg_header = 'WARNING:'
 | 
			
		||||
            warning_message = '%s %s' % (_msg_header, message)
 | 
			
		||||
            self.to_stderr(warning_message)
 | 
			
		||||
 | 
			
		||||
    def report_error(self, message, tb=None):
 | 
			
		||||
        '''
 | 
			
		||||
@@ -409,6 +413,13 @@ class YoutubeDL(object):
 | 
			
		||||
            template_dict['autonumber'] = autonumber_templ % self._num_downloads
 | 
			
		||||
            if template_dict.get('playlist_index') is not None:
 | 
			
		||||
                template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 | 
			
		||||
            if template_dict.get('resolution') is None:
 | 
			
		||||
                if template_dict.get('width') and template_dict.get('height'):
 | 
			
		||||
                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 | 
			
		||||
                elif template_dict.get('height'):
 | 
			
		||||
                    template_dict['resolution'] = '%sp' % template_dict['height']
 | 
			
		||||
                elif template_dict.get('width'):
 | 
			
		||||
                    template_dict['resolution'] = '?x%d' % template_dict['width']
 | 
			
		||||
 | 
			
		||||
            sanitize = lambda k, v: sanitize_filename(
 | 
			
		||||
                compat_str(v),
 | 
			
		||||
@@ -656,6 +667,18 @@ class YoutubeDL(object):
 | 
			
		||||
                if f.get('vcodec') == 'none']
 | 
			
		||||
            if audio_formats:
 | 
			
		||||
                return audio_formats[0]
 | 
			
		||||
        elif format_spec == 'bestvideo':
 | 
			
		||||
            video_formats = [
 | 
			
		||||
                f for f in available_formats
 | 
			
		||||
                if f.get('acodec') == 'none']
 | 
			
		||||
            if video_formats:
 | 
			
		||||
                return video_formats[-1]
 | 
			
		||||
        elif format_spec == 'worstvideo':
 | 
			
		||||
            video_formats = [
 | 
			
		||||
                f for f in available_formats
 | 
			
		||||
                if f.get('acodec') == 'none']
 | 
			
		||||
            if video_formats:
 | 
			
		||||
                return video_formats[0]
 | 
			
		||||
        else:
 | 
			
		||||
            extensions = ['mp4', 'flv', 'webm', '3gp']
 | 
			
		||||
            if format_spec in extensions:
 | 
			
		||||
@@ -678,6 +701,11 @@ class YoutubeDL(object):
 | 
			
		||||
        if 'display_id' not in info_dict and 'id' in info_dict:
 | 
			
		||||
            info_dict['display_id'] = info_dict['id']
 | 
			
		||||
 | 
			
		||||
        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 | 
			
		||||
            upload_date = datetime.datetime.utcfromtimestamp(
 | 
			
		||||
                info_dict['timestamp'])
 | 
			
		||||
            info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 | 
			
		||||
 | 
			
		||||
        # This extractors handle format selection themselves
 | 
			
		||||
        if info_dict['extractor'] in ['Youku']:
 | 
			
		||||
            if download:
 | 
			
		||||
@@ -691,8 +719,11 @@ class YoutubeDL(object):
 | 
			
		||||
        else:
 | 
			
		||||
            formats = info_dict['formats']
 | 
			
		||||
 | 
			
		||||
        if not formats:
 | 
			
		||||
            raise ExtractorError('No video formats found!')
 | 
			
		||||
 | 
			
		||||
        # We check that all the formats have the format and format_id fields
 | 
			
		||||
        for (i, format) in enumerate(formats):
 | 
			
		||||
        for i, format in enumerate(formats):
 | 
			
		||||
            if format.get('format_id') is None:
 | 
			
		||||
                format['format_id'] = compat_str(i)
 | 
			
		||||
            if format.get('format') is None:
 | 
			
		||||
@@ -911,7 +942,7 @@ class YoutubeDL(object):
 | 
			
		||||
                    self.to_screen('[%s] %s: Downloading thumbnail ...' %
 | 
			
		||||
                                   (info_dict['extractor'], info_dict['id']))
 | 
			
		||||
                    try:
 | 
			
		||||
                        uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 | 
			
		||||
                        uf = self.urlopen(info_dict['thumbnail'])
 | 
			
		||||
                        with open(thumb_filename, 'wb') as thumbf:
 | 
			
		||||
                            shutil.copyfileobj(uf, thumbf)
 | 
			
		||||
                        self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 | 
			
		||||
@@ -1157,7 +1188,7 @@ class YoutubeDL(object):
 | 
			
		||||
 | 
			
		||||
    def urlopen(self, req):
 | 
			
		||||
        """ Start an HTTP download """
 | 
			
		||||
        return self._opener.open(req)
 | 
			
		||||
        return self._opener.open(req, timeout=self._socket_timeout)
 | 
			
		||||
 | 
			
		||||
    def print_debug_header(self):
 | 
			
		||||
        if not self.params.get('verbose'):
 | 
			
		||||
@@ -1188,7 +1219,7 @@ class YoutubeDL(object):
 | 
			
		||||
 | 
			
		||||
    def _setup_opener(self):
 | 
			
		||||
        timeout_val = self.params.get('socket_timeout')
 | 
			
		||||
        timeout = 600 if timeout_val is None else float(timeout_val)
 | 
			
		||||
        self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
 | 
			
		||||
 | 
			
		||||
        opts_cookiefile = self.params.get('cookiefile')
 | 
			
		||||
        opts_proxy = self.params.get('proxy')
 | 
			
		||||
@@ -1226,7 +1257,3 @@ class YoutubeDL(object):
 | 
			
		||||
        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 | 
			
		||||
        opener.addheaders = []
 | 
			
		||||
        self._opener = opener
 | 
			
		||||
 | 
			
		||||
        # TODO remove this global modification
 | 
			
		||||
        compat_urllib_request.install_opener(opener)
 | 
			
		||||
        socket.setdefaulttimeout(timeout)
 | 
			
		||||
 
 | 
			
		||||
@@ -50,6 +50,7 @@ __authors__  = (
 | 
			
		||||
    'Anthony Weems',
 | 
			
		||||
    'David Wagner',
 | 
			
		||||
    'Juan C. Olivares',
 | 
			
		||||
    'Mattias Harrysson',
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
__license__ = 'Public Domain'
 | 
			
		||||
@@ -315,7 +316,7 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
 | 
			
		||||
    video_format.add_option('-f', '--format',
 | 
			
		||||
            action='store', dest='format', metavar='FORMAT', default=None,
 | 
			
		||||
            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio". By default, youtube-dl will pick the best quality.')
 | 
			
		||||
            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
 | 
			
		||||
    video_format.add_option('--all-formats',
 | 
			
		||||
            action='store_const', dest='format', help='download all available video formats', const='all')
 | 
			
		||||
    video_format.add_option('--prefer-free-formats',
 | 
			
		||||
@@ -430,6 +431,8 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
                  '%(extractor)s for the provider (youtube, metacafe, etc), '
 | 
			
		||||
                  '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
 | 
			
		||||
                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
 | 
			
		||||
                  '%(height)s and %(width)s for the width and height of the video format. '
 | 
			
		||||
                  '%(resolution)s for a textual description of the resolution of the video format. '
 | 
			
		||||
                  'Use - to output to stdout. Can also be used to download to a different directory, '
 | 
			
		||||
                  'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
 | 
			
		||||
    filesystem.add_option('--autonumber-size',
 | 
			
		||||
 
 | 
			
		||||
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
 | 
			
		||||
        while count <= retries:
 | 
			
		||||
            # Establish connection
 | 
			
		||||
            try:
 | 
			
		||||
                data = compat_urllib_request.urlopen(request)
 | 
			
		||||
                data = self.ydl.urlopen(request)
 | 
			
		||||
                break
 | 
			
		||||
            except (compat_urllib_error.HTTPError, ) as err:
 | 
			
		||||
                if (err.code < 500 or err.code >= 600) and err.code != 416:
 | 
			
		||||
@@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
 | 
			
		||||
                    # Unable to resume (requested range not satisfiable)
 | 
			
		||||
                    try:
 | 
			
		||||
                        # Open the connection again without the range header
 | 
			
		||||
                        data = compat_urllib_request.urlopen(basic_request)
 | 
			
		||||
                        data = self.ydl.urlopen(basic_request)
 | 
			
		||||
                        content_length = data.info()['Content-Length']
 | 
			
		||||
                    except (compat_urllib_error.HTTPError, ) as err:
 | 
			
		||||
                        if err.code < 500 or err.code >= 600:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import subprocess
 | 
			
		||||
@@ -22,7 +24,7 @@ class RtmpFD(FileDownloader):
 | 
			
		||||
            proc_stderr_closed = False
 | 
			
		||||
            while not proc_stderr_closed:
 | 
			
		||||
                # read line from stderr
 | 
			
		||||
                line = u''
 | 
			
		||||
                line = ''
 | 
			
		||||
                while True:
 | 
			
		||||
                    char = proc.stderr.read(1)
 | 
			
		||||
                    if not char:
 | 
			
		||||
@@ -46,7 +48,7 @@ class RtmpFD(FileDownloader):
 | 
			
		||||
                    data_len = None
 | 
			
		||||
                    if percent > 0:
 | 
			
		||||
                        data_len = int(downloaded_data_len * 100 / percent)
 | 
			
		||||
                    data_len_str = u'~' + format_bytes(data_len)
 | 
			
		||||
                    data_len_str = '~' + format_bytes(data_len)
 | 
			
		||||
                    self.report_progress(percent, data_len_str, speed, eta)
 | 
			
		||||
                    cursor_in_new_line = False
 | 
			
		||||
                    self._hook_progress({
 | 
			
		||||
@@ -76,12 +78,12 @@ class RtmpFD(FileDownloader):
 | 
			
		||||
                        })
 | 
			
		||||
                    elif self.params.get('verbose', False):
 | 
			
		||||
                        if not cursor_in_new_line:
 | 
			
		||||
                            self.to_screen(u'')
 | 
			
		||||
                            self.to_screen('')
 | 
			
		||||
                        cursor_in_new_line = True
 | 
			
		||||
                        self.to_screen(u'[rtmpdump] '+line)
 | 
			
		||||
                        self.to_screen('[rtmpdump] '+line)
 | 
			
		||||
            proc.wait()
 | 
			
		||||
            if not cursor_in_new_line:
 | 
			
		||||
                self.to_screen(u'')
 | 
			
		||||
                self.to_screen('')
 | 
			
		||||
            return proc.returncode
 | 
			
		||||
 | 
			
		||||
        url = info_dict['url']
 | 
			
		||||
@@ -102,7 +104,7 @@ class RtmpFD(FileDownloader):
 | 
			
		||||
        try:
 | 
			
		||||
            subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 | 
			
		||||
        except (OSError, IOError):
 | 
			
		||||
            self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
 | 
			
		||||
            self.report_error('RTMP download detected but "rtmpdump" could not be run')
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # Download using rtmpdump. rtmpdump returns exit code 2 when
 | 
			
		||||
@@ -127,7 +129,7 @@ class RtmpFD(FileDownloader):
 | 
			
		||||
            basic_args += ['--live']
 | 
			
		||||
        if conn:
 | 
			
		||||
            basic_args += ['--conn', conn]
 | 
			
		||||
        args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
 | 
			
		||||
        args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
 | 
			
		||||
 | 
			
		||||
        if sys.platform == 'win32' and sys.version_info < (3, 0):
 | 
			
		||||
            # Windows subprocess module does not actually support Unicode
 | 
			
		||||
@@ -150,26 +152,35 @@ class RtmpFD(FileDownloader):
 | 
			
		||||
                shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
 | 
			
		||||
            except ImportError:
 | 
			
		||||
                shell_quote = repr
 | 
			
		||||
            self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
 | 
			
		||||
            self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
 | 
			
		||||
 | 
			
		||||
        RD_SUCCESS = 0
 | 
			
		||||
        RD_FAILED = 1
 | 
			
		||||
        RD_INCOMPLETE = 2
 | 
			
		||||
        RD_NO_CONNECT = 3
 | 
			
		||||
 | 
			
		||||
        retval = run_rtmpdump(args)
 | 
			
		||||
 | 
			
		||||
        while (retval == 2 or retval == 1) and not test:
 | 
			
		||||
        if retval == RD_NO_CONNECT:
 | 
			
		||||
            self.report_error('[rtmpdump] Could not connect to RTMP server.')
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
 | 
			
		||||
            prevsize = os.path.getsize(encodeFilename(tmpfilename))
 | 
			
		||||
            self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
 | 
			
		||||
            self.to_screen('[rtmpdump] %s bytes' % prevsize)
 | 
			
		||||
            time.sleep(5.0) # This seems to be needed
 | 
			
		||||
            retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 | 
			
		||||
            retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
 | 
			
		||||
            cursize = os.path.getsize(encodeFilename(tmpfilename))
 | 
			
		||||
            if prevsize == cursize and retval == 1:
 | 
			
		||||
            if prevsize == cursize and retval == RD_FAILED:
 | 
			
		||||
                break
 | 
			
		||||
             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 | 
			
		||||
            if prevsize == cursize and retval == 2 and cursize > 1024:
 | 
			
		||||
                self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 | 
			
		||||
                retval = 0
 | 
			
		||||
            if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
 | 
			
		||||
                self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 | 
			
		||||
                retval = RD_SUCCESS
 | 
			
		||||
                break
 | 
			
		||||
        if retval == 0 or (test and retval == 2):
 | 
			
		||||
        if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
 | 
			
		||||
            fsize = os.path.getsize(encodeFilename(tmpfilename))
 | 
			
		||||
            self.to_screen(u'[rtmpdump] %s bytes' % fsize)
 | 
			
		||||
            self.to_screen('[rtmpdump] %s bytes' % fsize)
 | 
			
		||||
            self.try_rename(tmpfilename, filename)
 | 
			
		||||
            self._hook_progress({
 | 
			
		||||
                'downloaded_bytes': fsize,
 | 
			
		||||
@@ -179,6 +190,6 @@ class RtmpFD(FileDownloader):
 | 
			
		||||
            })
 | 
			
		||||
            return True
 | 
			
		||||
        else:
 | 
			
		||||
            self.to_stderr(u"\n")
 | 
			
		||||
            self.report_error(u'rtmpdump exited with code %d' % retval)
 | 
			
		||||
            self.to_stderr('\n')
 | 
			
		||||
            self.report_error('rtmpdump exited with code %d' % retval)
 | 
			
		||||
            return False
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,6 @@
 | 
			
		||||
from .academicearth import AcademicEarthCourseIE
 | 
			
		||||
from .addanime import AddAnimeIE
 | 
			
		||||
from .aftonbladet import AftonbladetIE
 | 
			
		||||
from .anitube import AnitubeIE
 | 
			
		||||
from .aparat import AparatIE
 | 
			
		||||
from .appletrailers import AppleTrailersIE
 | 
			
		||||
@@ -52,7 +53,6 @@ from .dailymotion import (
 | 
			
		||||
    DailymotionUserIE,
 | 
			
		||||
)
 | 
			
		||||
from .daum import DaumIE
 | 
			
		||||
from .depositfiles import DepositFilesIE
 | 
			
		||||
from .dotsub import DotsubIE
 | 
			
		||||
from .dreisat import DreiSatIE
 | 
			
		||||
from .defense import DefenseGouvFrIE
 | 
			
		||||
@@ -175,6 +175,7 @@ from .ooyala import OoyalaIE
 | 
			
		||||
from .orf import ORFIE
 | 
			
		||||
from .pbs import PBSIE
 | 
			
		||||
from .photobucket import PhotobucketIE
 | 
			
		||||
from .playvid import PlayvidIE
 | 
			
		||||
from .podomatic import PodomaticIE
 | 
			
		||||
from .pornhd import PornHdIE
 | 
			
		||||
from .pornhub import PornHubIE
 | 
			
		||||
@@ -195,6 +196,7 @@ from .rutube import (
 | 
			
		||||
    RutubeMovieIE,
 | 
			
		||||
    RutubePersonIE,
 | 
			
		||||
)
 | 
			
		||||
from .rutv import RUTVIE
 | 
			
		||||
from .savefrom import SaveFromIE
 | 
			
		||||
from .servingsys import ServingSysIE
 | 
			
		||||
from .sina import SinaIE
 | 
			
		||||
@@ -241,6 +243,10 @@ from .tumblr import TumblrIE
 | 
			
		||||
from .tutv import TutvIE
 | 
			
		||||
from .tvigle import TvigleIE
 | 
			
		||||
from .tvp import TvpIE
 | 
			
		||||
from .udemy import (
 | 
			
		||||
    UdemyIE,
 | 
			
		||||
    UdemyCourseIE
 | 
			
		||||
)
 | 
			
		||||
from .unistra import UnistraIE
 | 
			
		||||
from .ustream import UstreamIE, UstreamChannelIE
 | 
			
		||||
from .vbox7 import Vbox7IE
 | 
			
		||||
@@ -267,6 +273,7 @@ from .viki import VikiIE
 | 
			
		||||
from .vk import VKIE
 | 
			
		||||
from .vube import VubeIE
 | 
			
		||||
from .wat import WatIE
 | 
			
		||||
from .wdr import WDRIE
 | 
			
		||||
from .weibo import WeiboIE
 | 
			
		||||
from .wimp import WimpIE
 | 
			
		||||
from .wistia import WistiaIE
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/aftonbladet.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,69 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import datetime
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AftonbladetIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'article36015',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
 | 
			
		||||
            'description': 'Jupiters måne mest aktiv av alla himlakroppar',
 | 
			
		||||
            'upload_date': '20140306',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.search(self._VALID_URL, url)
 | 
			
		||||
 | 
			
		||||
        video_id = mobj.group('video_id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        # find internal video meta data
 | 
			
		||||
        META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
 | 
			
		||||
        internal_meta_id = self._html_search_regex(
 | 
			
		||||
            r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
 | 
			
		||||
        internal_meta_url = META_URL % internal_meta_id
 | 
			
		||||
        internal_meta_json = self._download_json(
 | 
			
		||||
            internal_meta_url, video_id, 'Downloading video meta data')
 | 
			
		||||
 | 
			
		||||
        # find internal video formats
 | 
			
		||||
        FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
 | 
			
		||||
        internal_video_id = internal_meta_json['videoId']
 | 
			
		||||
        internal_formats_url = FORMATS_URL % internal_video_id
 | 
			
		||||
        internal_formats_json = self._download_json(
 | 
			
		||||
            internal_formats_url, video_id, 'Downloading video formats')
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
 | 
			
		||||
            p = fmt['paths'][0]
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'width': fmt['width'],
 | 
			
		||||
                'height': fmt['height'],
 | 
			
		||||
                'tbr': fmt['bitrate'],
 | 
			
		||||
                'protocol': 'http',
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
 | 
			
		||||
        upload_date = timestamp.strftime('%Y%m%d')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': internal_meta_json['title'],
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'thumbnail': internal_meta_json['imageUrl'],
 | 
			
		||||
            'description': internal_meta_json['shortPreamble'],
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'duration': internal_meta_json['duration'],
 | 
			
		||||
            'view_count': internal_meta_json['views'],
 | 
			
		||||
        }
 | 
			
		||||
@@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
 | 
			
		||||
            return self._extract_liveweb(url, name, lang)
 | 
			
		||||
 | 
			
		||||
        if re.search(self._LIVE_URL, url) is not None:
 | 
			
		||||
            raise ExtractorError(u'Arte live streams are not yet supported, sorry')
 | 
			
		||||
            raise ExtractorError('Arte live streams are not yet supported, sorry')
 | 
			
		||||
            # self.extractLiveStream(url)
 | 
			
		||||
            # return
 | 
			
		||||
 | 
			
		||||
        raise ExtractorError('No video found')
 | 
			
		||||
 | 
			
		||||
    def _extract_video(self, url, video_id, lang):
 | 
			
		||||
        """Extract from videos.arte.tv"""
 | 
			
		||||
        ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
 | 
			
		||||
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
 | 
			
		||||
        ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
 | 
			
		||||
        ref_xml_doc = self._download_xml(
 | 
			
		||||
            ref_xml_url, video_id, note='Downloading metadata')
 | 
			
		||||
        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
 | 
			
		||||
        config_xml_url = config_node.attrib['ref']
 | 
			
		||||
        config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
 | 
			
		||||
        config_xml = self._download_webpage(
 | 
			
		||||
            config_xml_url, video_id, note='Downloading configuration')
 | 
			
		||||
 | 
			
		||||
        video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
 | 
			
		||||
        def _key(m):
 | 
			
		||||
 
 | 
			
		||||
@@ -9,21 +9,35 @@ from ..utils import ExtractorError
 | 
			
		||||
 | 
			
		||||
class BRIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = "Bayerischer Rundfunk Mediathek"
 | 
			
		||||
    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
 | 
			
		||||
    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
 | 
			
		||||
    _BASE_URL = "http://www.br.de"
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
 | 
			
		||||
        "md5": "c4f83cf0f023ba5875aba0bf46860df2",
 | 
			
		||||
        "info_dict": {
 | 
			
		||||
            "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
 | 
			
		||||
            "ext": "mp4",
 | 
			
		||||
            "title": "Feiern und Verzichten",
 | 
			
		||||
            "description": "Anselm Grün: Feiern und Verzichten",
 | 
			
		||||
            "uploader": "BR/Birgit Baier",
 | 
			
		||||
            "upload_date": "20140301"
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
 | 
			
		||||
            "md5": "c4f83cf0f023ba5875aba0bf46860df2",
 | 
			
		||||
            "info_dict": {
 | 
			
		||||
                "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
 | 
			
		||||
                "ext": "mp4",
 | 
			
		||||
                "title": "Feiern und Verzichten",
 | 
			
		||||
                "description": "Anselm Grün: Feiern und Verzichten",
 | 
			
		||||
                "uploader": "BR/Birgit Baier",
 | 
			
		||||
                "upload_date": "20140301"
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
 | 
			
		||||
            "md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
 | 
			
		||||
            "info_dict": {
 | 
			
		||||
                "id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
 | 
			
		||||
                "ext": "mp4",
 | 
			
		||||
                "title": "Über den Pass",
 | 
			
		||||
                "description": "Die Eroberung der Alpen: Über den Pass",
 | 
			
		||||
                "uploader": None,
 | 
			
		||||
                "upload_date": None
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
@@ -33,16 +47,21 @@ class BRIE(InfoExtractor):
 | 
			
		||||
            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
 | 
			
		||||
        xml = self._download_xml(self._BASE_URL + xml_url, None)
 | 
			
		||||
 | 
			
		||||
        videos = [{
 | 
			
		||||
            "id": xml_video.get("externalId"),
 | 
			
		||||
            "title": xml_video.find("title").text,
 | 
			
		||||
            "formats": self._extract_formats(xml_video.find("assets")),
 | 
			
		||||
            "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
 | 
			
		||||
            "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
 | 
			
		||||
            "uploader": xml_video.find("author").text,
 | 
			
		||||
            "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
 | 
			
		||||
            "webpage_url": xml_video.find("permalink").text,
 | 
			
		||||
        } for xml_video in xml.findall("video")]
 | 
			
		||||
        videos = []
 | 
			
		||||
        for xml_video in xml.findall("video"):
 | 
			
		||||
            video = {
 | 
			
		||||
                "id": xml_video.get("externalId"),
 | 
			
		||||
                "title": xml_video.find("title").text,
 | 
			
		||||
                "formats": self._extract_formats(xml_video.find("assets")),
 | 
			
		||||
                "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
 | 
			
		||||
                "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
 | 
			
		||||
                "webpage_url": xml_video.find("permalink").text
 | 
			
		||||
            }
 | 
			
		||||
            if xml_video.find("author").text:
 | 
			
		||||
                video["uploader"] = xml_video.find("author").text
 | 
			
		||||
            if xml_video.find("broadcastDate").text:
 | 
			
		||||
                video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
 | 
			
		||||
            videos.append(video)
 | 
			
		||||
 | 
			
		||||
        if len(videos) > 1:
 | 
			
		||||
            self._downloader.report_warning(
 | 
			
		||||
 
 | 
			
		||||
@@ -17,8 +17,9 @@ class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
            'id': '6902724',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Comic-Con Cosplay Catastrophe',
 | 
			
		||||
            'description': 'Fans get creative this year',
 | 
			
		||||
            'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.",
 | 
			
		||||
            'age_limit': 13,
 | 
			
		||||
            'duration': 187,
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
@@ -28,22 +29,22 @@ class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
            'id': '3505939',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Font Conference',
 | 
			
		||||
            'description': 'This video wasn\'t long enough,',
 | 
			
		||||
            'description': "This video wasn't long enough, so we made it double-spaced.",
 | 
			
		||||
            'age_limit': 10,
 | 
			
		||||
            'duration': 179,
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    # embedded youtube video
 | 
			
		||||
    {
 | 
			
		||||
        'url': 'http://www.collegehumor.com/embed/6950457',
 | 
			
		||||
        'url': 'http://www.collegehumor.com/embed/6950306',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'W5gMp3ZjYg4',
 | 
			
		||||
            'id': 'Z-bao9fg6Yc',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
 | 
			
		||||
            'uploader': 'FunnyPlox TV',
 | 
			
		||||
            'uploader_id': 'funnyploxtv',
 | 
			
		||||
            'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
 | 
			
		||||
            'upload_date': '20140128',
 | 
			
		||||
            'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
 | 
			
		||||
            'uploader': 'Mark Dice',
 | 
			
		||||
            'uploader_id': 'MarkDice',
 | 
			
		||||
            'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
 | 
			
		||||
            'upload_date': '20140127',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
@@ -87,6 +88,7 @@ class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        duration = int_or_none(vdata.get('duration'), 1000)
 | 
			
		||||
        like_count = int_or_none(vdata.get('likes'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
@@ -96,4 +98,5 @@ class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'like_count': like_count,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -97,7 +97,9 @@ class InfoExtractor(object):
 | 
			
		||||
    thumbnail:      Full URL to a video thumbnail image.
 | 
			
		||||
    description:    One-line video description.
 | 
			
		||||
    uploader:       Full name of the video uploader.
 | 
			
		||||
    timestamp:      UNIX timestamp of the moment the video became available.
 | 
			
		||||
    upload_date:    Video upload date (YYYYMMDD).
 | 
			
		||||
                    If not explicitly set, calculated from timestamp.
 | 
			
		||||
    uploader_id:    Nickname or id of the video uploader.
 | 
			
		||||
    location:       Physical location of the video.
 | 
			
		||||
    subtitles:      The subtitle file contents as a dictionary in the format
 | 
			
		||||
@@ -118,9 +120,6 @@ class InfoExtractor(object):
 | 
			
		||||
    _real_extract() methods and define a _VALID_URL regexp.
 | 
			
		||||
    Probably, they should also be added to the list of extractors.
 | 
			
		||||
 | 
			
		||||
    _real_extract() must return a *list* of information dictionaries as
 | 
			
		||||
    described above.
 | 
			
		||||
 | 
			
		||||
    Finally, the _WORKING attribute should be set to False for broken IEs
 | 
			
		||||
    in order to warn the users and skip the tests.
 | 
			
		||||
    """
 | 
			
		||||
 
 | 
			
		||||
@@ -12,6 +12,7 @@ from ..utils import (
 | 
			
		||||
    get_element_by_id,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
@@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
            if video_url is not None:
 | 
			
		||||
                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
 | 
			
		||||
                if m_size is not None:
 | 
			
		||||
                    width, height = m_size.group(1), m_size.group(2)
 | 
			
		||||
                    width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
 | 
			
		||||
                else:
 | 
			
		||||
                    width, height = None, None
 | 
			
		||||
                formats.append({
 | 
			
		||||
 
 | 
			
		||||
@@ -1,60 +0,0 @@
 | 
			
		||||
import re
 | 
			
		||||
import os
 | 
			
		||||
import socket
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_http_client,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DepositFilesIE(InfoExtractor):
 | 
			
		||||
    """Information extractor for depositfiles.com"""
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        file_id = url.split('/')[-1]
 | 
			
		||||
        # Rebuild url in english locale
 | 
			
		||||
        url = 'http://depositfiles.com/en/files/' + file_id
 | 
			
		||||
 | 
			
		||||
        # Retrieve file webpage with 'Free download' button pressed
 | 
			
		||||
        free_download_indication = {'gateway_result' : '1'}
 | 
			
		||||
        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
 | 
			
		||||
        try:
 | 
			
		||||
            self.report_download_webpage(file_id)
 | 
			
		||||
            webpage = compat_urllib_request.urlopen(request).read()
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
 | 
			
		||||
 | 
			
		||||
        # Search for the real file URL
 | 
			
		||||
        mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
 | 
			
		||||
        if (mobj is None) or (mobj.group(1) is None):
 | 
			
		||||
            # Try to figure out reason of the error.
 | 
			
		||||
            mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
 | 
			
		||||
            if (mobj is not None) and (mobj.group(1) is not None):
 | 
			
		||||
                restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
 | 
			
		||||
                raise ExtractorError(u'%s' % restriction_message)
 | 
			
		||||
            else:
 | 
			
		||||
                raise ExtractorError(u'Unable to extract download URL from: %s' % url)
 | 
			
		||||
 | 
			
		||||
        file_url = mobj.group(1)
 | 
			
		||||
        file_extension = os.path.splitext(file_url)[1][1:]
 | 
			
		||||
 | 
			
		||||
        # Search for file title
 | 
			
		||||
        file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':       file_id.decode('utf-8'),
 | 
			
		||||
            'url':      file_url.decode('utf-8'),
 | 
			
		||||
            'uploader': None,
 | 
			
		||||
            'upload_date':  None,
 | 
			
		||||
            'title':    file_title,
 | 
			
		||||
            'ext':      file_extension.decode('utf-8'),
 | 
			
		||||
        }]
 | 
			
		||||
@@ -11,16 +11,15 @@ from ..utils import (
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FacebookIE(InfoExtractor):
 | 
			
		||||
    """Information Extractor for Facebook"""
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'''(?x)
 | 
			
		||||
        (?:https?://)?(?:\w+\.)?facebook\.com/
 | 
			
		||||
        https?://(?:\w+\.)?facebook\.com/
 | 
			
		||||
        (?:[^#?]*\#!/)?
 | 
			
		||||
        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
 | 
			
		||||
        (?:v|video_id)=(?P<id>[0-9]+)
 | 
			
		||||
@@ -35,15 +34,11 @@ class FacebookIE(InfoExtractor):
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '120708114770723',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            u"duration": 279,
 | 
			
		||||
            u"title": u"PEOPLE ARE AWESOME 2013"
 | 
			
		||||
            'duration': 279,
 | 
			
		||||
            'title': 'PEOPLE ARE AWESOME 2013',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def report_login(self):
 | 
			
		||||
        """Report attempt to log in."""
 | 
			
		||||
        self.to_screen('Logging in')
 | 
			
		||||
 | 
			
		||||
    def _login(self):
 | 
			
		||||
        (useremail, password) = self._get_login_info()
 | 
			
		||||
        if useremail is None:
 | 
			
		||||
@@ -51,8 +46,8 @@ class FacebookIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
 | 
			
		||||
        login_page_req.add_header('Cookie', 'locale=en_US')
 | 
			
		||||
        self.report_login()
 | 
			
		||||
        login_page = self._download_webpage(login_page_req, None, note=False,
 | 
			
		||||
        login_page = self._download_webpage(login_page_req, None,
 | 
			
		||||
            note='Downloading login page',
 | 
			
		||||
            errnote='Unable to download login page')
 | 
			
		||||
        lsd = self._search_regex(
 | 
			
		||||
            r'<input type="hidden" name="lsd" value="([^"]*)"',
 | 
			
		||||
@@ -70,23 +65,25 @@ class FacebookIE(InfoExtractor):
 | 
			
		||||
            'timezone': '-60',
 | 
			
		||||
            'trynum': '1',
 | 
			
		||||
            }
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
 | 
			
		||||
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 | 
			
		||||
        try:
 | 
			
		||||
            login_results = compat_urllib_request.urlopen(request).read()
 | 
			
		||||
            login_results = self._download_webpage(request, None,
 | 
			
		||||
                note='Logging in', errnote='unable to fetch login page')
 | 
			
		||||
            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
 | 
			
		||||
                self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
            check_form = {
 | 
			
		||||
                'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
 | 
			
		||||
                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
 | 
			
		||||
                'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
 | 
			
		||||
                'name_action_selected': 'dont_save',
 | 
			
		||||
                'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
 | 
			
		||||
                'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
 | 
			
		||||
            }
 | 
			
		||||
            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
 | 
			
		||||
            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
 | 
			
		||||
            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
 | 
			
		||||
            check_response = compat_urllib_request.urlopen(check_req).read()
 | 
			
		||||
            check_response = self._download_webpage(check_req, None,
 | 
			
		||||
                note='Confirming login')
 | 
			
		||||
            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
 | 
			
		||||
                self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
@@ -98,8 +95,6 @@ class FacebookIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError('Invalid URL: %s' % url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
 | 
			
		||||
@@ -125,18 +120,14 @@ class FacebookIE(InfoExtractor):
 | 
			
		||||
            video_url = video_data['sd_src']
 | 
			
		||||
        if not video_url:
 | 
			
		||||
            raise ExtractorError('Cannot find video URL')
 | 
			
		||||
        video_duration = int(video_data['video_duration'])
 | 
			
		||||
        thumbnail = video_data['thumbnail_src']
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'duration': video_duration,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'duration': int(video_data['video_duration']),
 | 
			
		||||
            'thumbnail': video_data['thumbnail_src'],
 | 
			
		||||
        }
 | 
			
		||||
        return [info]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,13 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FunnyOrDieIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
 | 
			
		||||
        'file': '0732f586d7.mp4',
 | 
			
		||||
@@ -30,10 +31,23 @@ class FunnyOrDieIE(InfoExtractor):
 | 
			
		||||
            [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
 | 
			
		||||
            webpage, 'video URL', flags=re.DOTALL)
 | 
			
		||||
 | 
			
		||||
        if mobj.group('type') == 'embed':
 | 
			
		||||
            post_json = self._search_regex(
 | 
			
		||||
                r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
 | 
			
		||||
            post = json.loads(post_json)
 | 
			
		||||
            title = post['name']
 | 
			
		||||
            description = post.get('description')
 | 
			
		||||
            thumbnail = post.get('picture')
 | 
			
		||||
        else:
 | 
			
		||||
            title = self._og_search_title(webpage)
 | 
			
		||||
            description = self._og_search_description(webpage)
 | 
			
		||||
            thumbnail = None
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': self._og_search_title(webpage),
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
@@ -6,13 +8,14 @@ from .common import InfoExtractor
 | 
			
		||||
class GamekingsIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
 | 
			
		||||
        u'file': u'20130811.mp4',
 | 
			
		||||
        'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
 | 
			
		||||
        # MD5 is flaky, seems to change regularly
 | 
			
		||||
        #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
 | 
			
		||||
        # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
 | 
			
		||||
            u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
 | 
			
		||||
            'id': '20130811',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
 | 
			
		||||
            'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .youtube import YoutubeIE
 | 
			
		||||
@@ -17,6 +16,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    parse_xml,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
@@ -24,6 +24,7 @@ from ..utils import (
 | 
			
		||||
)
 | 
			
		||||
from .brightcove import BrightcoveIE
 | 
			
		||||
from .ooyala import OoyalaIE
 | 
			
		||||
from .rutv import RUTVIE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GenericIE(InfoExtractor):
 | 
			
		||||
@@ -116,6 +117,48 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': False,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # embed.ly video
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '9ODmcdjQcHQ',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 | 
			
		||||
                'upload_date': '20140225',
 | 
			
		||||
                'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 | 
			
		||||
                'uploader': 'Tested',
 | 
			
		||||
                'uploader_id': 'testedcom',
 | 
			
		||||
            },
 | 
			
		||||
            # No need to test YoutubeIE here
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # funnyordie embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 | 
			
		||||
            'md5': '7cf780be104d40fea7bae52eed4a470e',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '18e820ec3f',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 | 
			
		||||
                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # RUTV embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '776940',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Охотское море стало целиком российским',
 | 
			
		||||
                'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
@@ -211,7 +254,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            else:
 | 
			
		||||
                assert ':' in default_search
 | 
			
		||||
                return self.url_result(default_search + url)
 | 
			
		||||
        video_id = os.path.splitext(url.split('/')[-1])[0]
 | 
			
		||||
        video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 | 
			
		||||
 | 
			
		||||
        self.to_screen('%s: Requesting header' % video_id)
 | 
			
		||||
 | 
			
		||||
@@ -257,7 +300,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Is it an RSS feed?
 | 
			
		||||
        try:
 | 
			
		||||
            doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
 | 
			
		||||
            doc = parse_xml(webpage)
 | 
			
		||||
            if doc.tag == 'rss':
 | 
			
		||||
                return self._extract_rss(url, video_id, doc)
 | 
			
		||||
        except compat_xml_parse_error:
 | 
			
		||||
@@ -296,9 +339,9 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Look for embedded (iframe) Vimeo player
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            player_url = unescapeHTML(mobj.group(1))
 | 
			
		||||
            player_url = unescapeHTML(mobj.group('url'))
 | 
			
		||||
            surl = smuggle_url(player_url, {'Referer': url})
 | 
			
		||||
            return self.url_result(surl, 'Vimeo')
 | 
			
		||||
 | 
			
		||||
@@ -407,6 +450,27 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(mobj.group('url'), 'HuffPost')
 | 
			
		||||
 | 
			
		||||
        # Look for embed.ly
 | 
			
		||||
        mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(mobj.group('url'))
 | 
			
		||||
        mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
 | 
			
		||||
 | 
			
		||||
        # Look for funnyordie embed
 | 
			
		||||
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
 | 
			
		||||
        if matches:
 | 
			
		||||
            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
 | 
			
		||||
                     for eurl in matches]
 | 
			
		||||
            return self.playlist_result(
 | 
			
		||||
                urlrs, playlist_id=video_id, playlist_title=video_title)
 | 
			
		||||
 | 
			
		||||
        # Look for embedded RUTV player
 | 
			
		||||
        rutv_url = RUTVIE._extract_url(webpage)
 | 
			
		||||
        if rutv_url:
 | 
			
		||||
            return self.url_result(rutv_url, 'RUTV')
 | 
			
		||||
 | 
			
		||||
        # Start with something easy: JW Player in SWFObject
 | 
			
		||||
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
 
 | 
			
		||||
@@ -46,6 +46,6 @@ class GoogleSearchIE(SearchInfoExtractor):
 | 
			
		||||
                    'url': mobj.group(1)
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
 | 
			
		||||
            if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage):
 | 
			
		||||
                res['entries'] = entries[:n]
 | 
			
		||||
                return res
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,10 @@ from random import random
 | 
			
		||||
from math import floor
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import compat_urllib_request
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class IPrimaIE(InfoExtractor):
 | 
			
		||||
@@ -36,6 +39,7 @@ class IPrimaIE(InfoExtractor):
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,  # requires rtmpdump
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Do not have permission to access this page',
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -44,6 +48,10 @@ class IPrimaIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        if re.search(r'Nemáte oprávnění přistupovat na tuto stránku.\s*</div>', webpage):
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
 | 
			
		||||
 | 
			
		||||
        player_url = (
 | 
			
		||||
            'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
 | 
			
		||||
            (floor(random()*1073741824), floor(random()*1073741824))
 | 
			
		||||
 
 | 
			
		||||
@@ -1,56 +1,61 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class JukeboxIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
 | 
			
		||||
    _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
 | 
			
		||||
    _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
 | 
			
		||||
    _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
 | 
			
		||||
    _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
 | 
			
		||||
        'md5': '5dc6477e74b1e37042ac5acedd8413e5',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'r303r',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Kosheen-En Vivo Pride',
 | 
			
		||||
            'uploader': 'Kosheen',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('video_id')
 | 
			
		||||
 | 
			
		||||
        html = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(self._IFRAME, html)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Cannot extract iframe url')
 | 
			
		||||
        iframe_url = unescapeHTML(mobj.group('iframe'))
 | 
			
		||||
        iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
 | 
			
		||||
 | 
			
		||||
        iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
 | 
			
		||||
        mobj = re.search(r'class="jkb_waiting"', iframe_html)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            raise ExtractorError(u'Video is not available(in your country?)!')
 | 
			
		||||
        if re.search(r'class="jkb_waiting"', iframe_html) is not None:
 | 
			
		||||
            raise ExtractorError('Video is not available(in your country?)!')
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(self._VIDEO_URL, iframe_html)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            mobj = re.search(self._IS_YOUTUBE, iframe_html)
 | 
			
		||||
            if mobj is None:
 | 
			
		||||
                raise ExtractorError(u'Cannot extract video url')
 | 
			
		||||
            youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
 | 
			
		||||
            self.to_screen(u'Youtube video detected')
 | 
			
		||||
            return self.url_result(youtube_url,ie='Youtube')
 | 
			
		||||
        video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
 | 
			
		||||
        video_ext = unescapeHTML(mobj.group('video_ext'))
 | 
			
		||||
        try:
 | 
			
		||||
            video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
 | 
			
		||||
                iframe_html, 'video url')
 | 
			
		||||
            video_url = unescapeHTML(video_url).replace('\/', '/')
 | 
			
		||||
        except RegexNotFoundError:
 | 
			
		||||
            youtube_url = self._search_regex(
 | 
			
		||||
                r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
 | 
			
		||||
                iframe_html, 'youtube url')
 | 
			
		||||
            youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
 | 
			
		||||
            self.to_screen('Youtube video detected')
 | 
			
		||||
            return self.url_result(youtube_url, ie='Youtube')
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(self._TITLE, html)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Cannot extract title')
 | 
			
		||||
        title = unescapeHTML(mobj.group('title'))
 | 
			
		||||
        artist = unescapeHTML(mobj.group('artist'))
 | 
			
		||||
        title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
 | 
			
		||||
            html, 'title')
 | 
			
		||||
        artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
 | 
			
		||||
            html, 'artist')
 | 
			
		||||
 | 
			
		||||
        return [{'id': video_id,
 | 
			
		||||
                 'url': video_url,
 | 
			
		||||
                 'title': artist + '-' + title,
 | 
			
		||||
                 'ext': video_ext
 | 
			
		||||
                 }]
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': artist + '-' + title,
 | 
			
		||||
            'uploader': artist,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,9 @@ from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    ExtractorError
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    compat_str,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor):
 | 
			
		||||
    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
 | 
			
		||||
    _NETRC_MACHINE = 'lynda'
 | 
			
		||||
 | 
			
		||||
    _SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
 | 
			
		||||
    _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
 | 
			
		||||
    _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
 | 
			
		||||
 | 
			
		||||
    ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
 | 
			
		||||
        'file': '114408.mp4',
 | 
			
		||||
        'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '114408',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Using the exercise files',
 | 
			
		||||
            'duration': 68
 | 
			
		||||
        }
 | 
			
		||||
@@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
 | 
			
		||||
        page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
 | 
			
		||||
                                      video_id, 'Downloading video JSON')
 | 
			
		||||
        page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
 | 
			
		||||
            'Downloading video JSON')
 | 
			
		||||
        video_json = json.loads(page)
 | 
			
		||||
 | 
			
		||||
        if 'Status' in video_json:
 | 
			
		||||
            raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
 | 
			
		||||
 | 
			
		||||
        if video_json['HasAccess'] is False:
 | 
			
		||||
            raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
 | 
			
		||||
 | 
			
		||||
        video_id = video_json['ID']
 | 
			
		||||
        video_id = compat_str(video_json['ID'])
 | 
			
		||||
        duration = video_json['DurationInSeconds']
 | 
			
		||||
        title = video_json['Title']
 | 
			
		||||
 | 
			
		||||
        formats = [{'url': fmt['Url'],
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        fmts = video_json.get('Formats')
 | 
			
		||||
        if fmts:
 | 
			
		||||
            formats.extend([
 | 
			
		||||
                {
 | 
			
		||||
                    'url': fmt['Url'],
 | 
			
		||||
                    'ext': fmt['Extension'],
 | 
			
		||||
                    'width': fmt['Width'],
 | 
			
		||||
                    'height': fmt['Height'],
 | 
			
		||||
                    'filesize': fmt['FileSize'],
 | 
			
		||||
                    'format_id': str(fmt['Resolution'])
 | 
			
		||||
                    } for fmt in video_json['Formats']]
 | 
			
		||||
                } for fmt in fmts])
 | 
			
		||||
 | 
			
		||||
        prioritized_streams = video_json.get('PrioritizedStreams')
 | 
			
		||||
        if prioritized_streams:
 | 
			
		||||
            formats.extend([
 | 
			
		||||
                {
 | 
			
		||||
                    'url': video_url,
 | 
			
		||||
                    'width': int_or_none(format_id),
 | 
			
		||||
                    'format_id': format_id,
 | 
			
		||||
                } for format_id, video_url in prioritized_streams['0'].items()
 | 
			
		||||
            ])
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
@@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
 | 
			
		||||
            'stayPut': 'false'
 | 
			
		||||
        }        
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
 | 
			
		||||
        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
 | 
			
		||||
        login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
 | 
			
		||||
 | 
			
		||||
        # Not (yet) logged in
 | 
			
		||||
        m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
 | 
			
		||||
@@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
 | 
			
		||||
                    'stayPut': 'false',
 | 
			
		||||
                }
 | 
			
		||||
                request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
 | 
			
		||||
                login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
 | 
			
		||||
                login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
 | 
			
		||||
 | 
			
		||||
        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
 | 
			
		||||
            raise ExtractorError('Unable to log in')
 | 
			
		||||
@@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _get_available_subtitles(self, video_id, webpage):
 | 
			
		||||
        url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
 | 
			
		||||
        sub = self._download_webpage(url, None, note=False)
 | 
			
		||||
        sub = self._download_webpage(url, None, False)
 | 
			
		||||
        sub_json = json.loads(sub)
 | 
			
		||||
        return {'en': url} if len(sub_json) > 0 else {}
 | 
			
		||||
 | 
			
		||||
@@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor):
 | 
			
		||||
        videos = []
 | 
			
		||||
        (username, _) = self._get_login_info()
 | 
			
		||||
 | 
			
		||||
        # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
 | 
			
		||||
        # by single video API anymore
 | 
			
		||||
 | 
			
		||||
        for chapter in course_json['Chapters']:
 | 
			
		||||
            for video in chapter['Videos']:
 | 
			
		||||
                if username is None and video['HasAccess'] is False:
 | 
			
		||||
 
 | 
			
		||||
@@ -5,9 +5,12 @@ import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    fix_xml_ampersands,
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    url_basename,
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
)
 | 
			
		||||
@@ -18,6 +21,7 @@ def _media_xml_tag(tag):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MTVServicesInfoExtractor(InfoExtractor):
 | 
			
		||||
    _MOBILE_TEMPLATE = None
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _id_from_uri(uri):
 | 
			
		||||
        return uri.split(':')[-1]
 | 
			
		||||
@@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
			
		||||
        else:
 | 
			
		||||
            return thumb_node.attrib['url']
 | 
			
		||||
 | 
			
		||||
    def _extract_video_formats(self, mdoc):
 | 
			
		||||
        if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
 | 
			
		||||
            raise ExtractorError('This video is not available from your country.', expected=True)
 | 
			
		||||
    def _extract_mobile_video_formats(self, mtvn_id):
 | 
			
		||||
        webpage_url = self._MOBILE_TEMPLATE % mtvn_id
 | 
			
		||||
        req = compat_urllib_request.Request(webpage_url)
 | 
			
		||||
        # Otherwise we get a webpage that would execute some javascript
 | 
			
		||||
        req.add_header('Youtubedl-user-agent', 'curl/7')
 | 
			
		||||
        webpage = self._download_webpage(req, mtvn_id,
 | 
			
		||||
            'Downloading mobile page')
 | 
			
		||||
        metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
 | 
			
		||||
        req = HEADRequest(metrics_url)
 | 
			
		||||
        response = self._request_webpage(req, mtvn_id, 'Resolving url')
 | 
			
		||||
        url = response.geturl()
 | 
			
		||||
        # Transform the url to get the best quality:
 | 
			
		||||
        url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
 | 
			
		||||
        return [{'url': url,'ext': 'mp4'}]
 | 
			
		||||
 | 
			
		||||
    def _extract_video_formats(self, mdoc, mtvn_id):
 | 
			
		||||
        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
 | 
			
		||||
            if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
 | 
			
		||||
                self.to_screen('The normal version is not available from your '
 | 
			
		||||
                    'country, trying with the mobile version')
 | 
			
		||||
                return self._extract_mobile_video_formats(mtvn_id)
 | 
			
		||||
            raise ExtractorError('This video is not available from your country.',
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for rendition in mdoc.findall('.//rendition'):
 | 
			
		||||
@@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
			
		||||
            raise ExtractorError('Could not find video title')
 | 
			
		||||
        title = title.strip()
 | 
			
		||||
 | 
			
		||||
        # This a short id that's used in the webpage urls
 | 
			
		||||
        mtvn_id = None
 | 
			
		||||
        mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
 | 
			
		||||
                'scheme', 'urn:mtvn:id')
 | 
			
		||||
        if mtvn_id_node is not None:
 | 
			
		||||
            mtvn_id = mtvn_id_node.text
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'formats': self._extract_video_formats(mediagen_doc),
 | 
			
		||||
            'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'thumbnail': self._get_thumbnail_url(uri, itemdoc),
 | 
			
		||||
            'description': description,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import binascii
 | 
			
		||||
import base64
 | 
			
		||||
import hashlib
 | 
			
		||||
@@ -14,18 +16,16 @@ from ..utils import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MyVideoIE(InfoExtractor):
 | 
			
		||||
    """Information Extractor for myvideo.de."""
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
 | 
			
		||||
    IE_NAME = u'myvideo'
 | 
			
		||||
    _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
 | 
			
		||||
    IE_NAME = 'myvideo'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
 | 
			
		||||
        u'file': u'8229274.flv',
 | 
			
		||||
        u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"bowling-fail-or-win"
 | 
			
		||||
        'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
 | 
			
		||||
        'md5': '2d2753e8130479ba2cb7e0a37002053e',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '8229274',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'bowling-fail-or-win',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self,url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        GK = (
 | 
			
		||||
          b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
 | 
			
		||||
@@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
 | 
			
		||||
            video_url = mobj.group(1) + '.flv'
 | 
			
		||||
 | 
			
		||||
            video_title = self._html_search_regex('<title>([^<]+)</title>',
 | 
			
		||||
                webpage, u'title')
 | 
			
		||||
                webpage, 'title')
 | 
			
		||||
 | 
			
		||||
            video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
 | 
			
		||||
 | 
			
		||||
            return [{
 | 
			
		||||
                'id':       video_id,
 | 
			
		||||
                'url':      video_url,
 | 
			
		||||
                'uploader': None,
 | 
			
		||||
                'upload_date':  None,
 | 
			
		||||
                'title':    video_title,
 | 
			
		||||
                'ext':      video_ext,
 | 
			
		||||
            }]
 | 
			
		||||
            return {
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'title': video_title,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
 | 
			
		||||
            response = self._download_webpage(request, video_id,
 | 
			
		||||
                                              u'Downloading video info')
 | 
			
		||||
                                              'Downloading video info')
 | 
			
		||||
            info = json.loads(base64.b64decode(response).decode('utf-8'))
 | 
			
		||||
            return {'id': video_id,
 | 
			
		||||
                    'title': info['title'],
 | 
			
		||||
                    'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
 | 
			
		||||
                    'play_path': info['filename'],
 | 
			
		||||
                    'ext': 'flv',
 | 
			
		||||
                    'thumbnail': info['thumbnail'][0]['url'],
 | 
			
		||||
                    }
 | 
			
		||||
            return {
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': info['title'],
 | 
			
		||||
                'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
 | 
			
		||||
                'play_path': info['filename'],
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'thumbnail': info['thumbnail'][0]['url'],
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # try encxml
 | 
			
		||||
        mobj = re.search('var flashvars={(.+?)}', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Unable to extract video')
 | 
			
		||||
            raise ExtractorError('Unable to extract video')
 | 
			
		||||
 | 
			
		||||
        params = {}
 | 
			
		||||
        encxml = ''
 | 
			
		||||
@@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
 | 
			
		||||
            params['domain'] = 'www.myvideo.de'
 | 
			
		||||
        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
 | 
			
		||||
        if 'flash_playertype=MTV' in xmldata_url:
 | 
			
		||||
            self._downloader.report_warning(u'avoiding MTV player')
 | 
			
		||||
            self._downloader.report_warning('avoiding MTV player')
 | 
			
		||||
            xmldata_url = (
 | 
			
		||||
                'http://www.myvideo.de/dynamic/get_player_video_xml.php'
 | 
			
		||||
                '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
 | 
			
		||||
@@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
 | 
			
		||||
            video_url = compat_urllib_parse.unquote(mobj.group(1))
 | 
			
		||||
            if 'myvideo2flash' in video_url:
 | 
			
		||||
                self.report_warning(
 | 
			
		||||
                    u'Rewriting URL to use unencrypted rtmp:// ...',
 | 
			
		||||
                    'Rewriting URL to use unencrypted rtmp:// ...',
 | 
			
		||||
                    video_id)
 | 
			
		||||
                video_url = video_url.replace('rtmpe://', 'rtmp://')
 | 
			
		||||
 | 
			
		||||
@@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
 | 
			
		||||
            # extract non rtmp videos
 | 
			
		||||
            mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
 | 
			
		||||
            if mobj is None:
 | 
			
		||||
                raise ExtractorError(u'unable to extract url')
 | 
			
		||||
                raise ExtractorError('unable to extract url')
 | 
			
		||||
            video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
 | 
			
		||||
 | 
			
		||||
        video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
 | 
			
		||||
        video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
 | 
			
		||||
        video_file = compat_urllib_parse.unquote(video_file)
 | 
			
		||||
 | 
			
		||||
        if not video_file.endswith('f4m'):
 | 
			
		||||
            ppath, prefix = video_file.split('.')
 | 
			
		||||
            video_playpath = '%s:%s' % (prefix, ppath)
 | 
			
		||||
            video_hls_playlist = ''
 | 
			
		||||
        else:
 | 
			
		||||
            video_playpath = ''
 | 
			
		||||
            video_hls_playlist = (
 | 
			
		||||
                video_file
 | 
			
		||||
            ).replace('.f4m', '.m3u8')
 | 
			
		||||
 | 
			
		||||
        video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
 | 
			
		||||
        video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
 | 
			
		||||
        video_swfobj = compat_urllib_parse.unquote(video_swfobj)
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
 | 
			
		||||
            webpage, u'title')
 | 
			
		||||
            webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':                 video_id,
 | 
			
		||||
            'url':                video_url,
 | 
			
		||||
            'tc_url':             video_url,
 | 
			
		||||
            'uploader':           None,
 | 
			
		||||
            'upload_date':        None,
 | 
			
		||||
            'title':              video_title,
 | 
			
		||||
            'ext':                u'flv',
 | 
			
		||||
            'play_path':          video_playpath,
 | 
			
		||||
            'video_file':         video_file,
 | 
			
		||||
            'video_hls_playlist': video_hls_playlist,
 | 
			
		||||
            'player_url':         video_swfobj,
 | 
			
		||||
        }]
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'tc_url': video_url,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'play_path': video_playpath,
 | 
			
		||||
            'player_url': video_swfobj,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,9 @@ class NineGagIE(InfoExtractor):
 | 
			
		||||
        "file": "1912.mp4",
 | 
			
		||||
        "info_dict": {
 | 
			
		||||
            "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
 | 
			
		||||
            "title": "\"People Are Awesome 2013\" Is Absolutely Awesome"
 | 
			
		||||
            "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
 | 
			
		||||
            "view_count": int,
 | 
			
		||||
            "thumbnail": "re:^https?://",
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['Youtube']
 | 
			
		||||
    }
 | 
			
		||||
@@ -25,21 +27,27 @@ class NineGagIE(InfoExtractor):
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        data_json = self._html_search_regex(r'''(?x)
 | 
			
		||||
            <div\s*id="tv-video"\s*data-video-source="youtube"\s*
 | 
			
		||||
                data-video-meta="([^"]+)"''', webpage, 'video metadata')
 | 
			
		||||
 | 
			
		||||
        data = json.loads(data_json)
 | 
			
		||||
        youtube_id = self._html_search_regex(
 | 
			
		||||
            r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
 | 
			
		||||
            webpage, 'video ID')
 | 
			
		||||
        description = self._html_search_regex(
 | 
			
		||||
            r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
 | 
			
		||||
            'description', fatal=False)
 | 
			
		||||
        view_count_str = self._html_search_regex(
 | 
			
		||||
            r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
 | 
			
		||||
            fatal=False)
 | 
			
		||||
        view_count = (
 | 
			
		||||
            None if view_count_str is None
 | 
			
		||||
            else int(view_count_str.replace(',', '')))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'url': data['youtubeVideoId'],
 | 
			
		||||
            'url': youtube_id,
 | 
			
		||||
            'ie_key': 'Youtube',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': data['title'],
 | 
			
		||||
            'description': data['description'],
 | 
			
		||||
            'view_count': int(data['view_count']),
 | 
			
		||||
            'like_count': int(data['statistic']['like']),
 | 
			
		||||
            'dislike_count': int(data['statistic']['dislike']),
 | 
			
		||||
            'thumbnail': data['thumbnail_url'],
 | 
			
		||||
            'title': self._og_search_title(webpage),
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,76 +1,43 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import datetime
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class PhotobucketIE(InfoExtractor):
 | 
			
		||||
    """Information extractor for photobucket.com."""
 | 
			
		||||
 | 
			
		||||
    # TODO: the original _VALID_URL was:
 | 
			
		||||
    # r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
 | 
			
		||||
    # Check if it's necessary to keep the old extracion process
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
 | 
			
		||||
    IE_NAME = u'photobucket'
 | 
			
		||||
    _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
 | 
			
		||||
        u'file': u'zpsc0c3b9fa.mp4',
 | 
			
		||||
        u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"upload_date": u"20130504", 
 | 
			
		||||
            u"uploader": u"rachaneronas", 
 | 
			
		||||
            u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
 | 
			
		||||
        'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
 | 
			
		||||
        'file': 'zpsc0c3b9fa.mp4',
 | 
			
		||||
        'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'upload_date': '20130504',
 | 
			
		||||
            'uploader': 'rachaneronas',
 | 
			
		||||
            'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        # Extract id from URL
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        video_extension = mobj.group('ext')
 | 
			
		||||
 | 
			
		||||
        # Retrieve video webpage to extract further information
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        # Extract URL, uploader, and title from webpage
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        # We try first by looking the javascript code:
 | 
			
		||||
        mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            info = json.loads(mobj.group('json'))
 | 
			
		||||
            return [{
 | 
			
		||||
                'id':       video_id,
 | 
			
		||||
                'url':      info[u'downloadUrl'],
 | 
			
		||||
                'uploader': info[u'username'],
 | 
			
		||||
                'upload_date':  datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
 | 
			
		||||
                'title':    info[u'title'],
 | 
			
		||||
                'ext':      video_extension,
 | 
			
		||||
                'thumbnail': info[u'thumbUrl'],
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        # We try looking in other parts of the webpage
 | 
			
		||||
        video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
 | 
			
		||||
            webpage, u'video URL')
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Unable to extract title')
 | 
			
		||||
        video_title = mobj.group(1).decode('utf-8')
 | 
			
		||||
        video_uploader = mobj.group(2).decode('utf-8')
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':       video_id.decode('utf-8'),
 | 
			
		||||
            'url':      video_url.decode('utf-8'),
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
            'upload_date':  None,
 | 
			
		||||
            'title':    video_title,
 | 
			
		||||
            'ext':      video_extension.decode('utf-8'),
 | 
			
		||||
        }]
 | 
			
		||||
        info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
 | 
			
		||||
            webpage, 'info json')
 | 
			
		||||
        info = json.loads(info_json)
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': info['downloadUrl'],
 | 
			
		||||
            'uploader': info['username'],
 | 
			
		||||
            'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
 | 
			
		||||
            'title': info['title'],
 | 
			
		||||
            'ext': video_extension,
 | 
			
		||||
            'thumbnail': info['thumbUrl'],
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/playvid.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,80 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PlayvidIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
 | 
			
		||||
        'md5': '44930f8afa616efdf9482daf4fe53e1e',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'agbDDi7WZTV',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Michelle Lewin in Miami Beach',
 | 
			
		||||
            'duration': 240,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_title = None
 | 
			
		||||
        duration = None
 | 
			
		||||
        video_thumbnail = None
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        # most of the information is stored in the flashvars
 | 
			
		||||
        flashvars = self._html_search_regex(
 | 
			
		||||
            r'flashvars="(.+?)"', webpage, 'flashvars')
 | 
			
		||||
 | 
			
		||||
        infos = compat_urllib_parse.unquote(flashvars).split(r'&')
 | 
			
		||||
        for info in infos:
 | 
			
		||||
            videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
 | 
			
		||||
            if videovars_match:
 | 
			
		||||
                key = videovars_match.group(1)
 | 
			
		||||
                val = videovars_match.group(2)
 | 
			
		||||
 | 
			
		||||
                if key == 'title':
 | 
			
		||||
                    video_title = compat_urllib_parse.unquote_plus(val)
 | 
			
		||||
                if key == 'duration':
 | 
			
		||||
                    try:
 | 
			
		||||
                        duration = int(val)
 | 
			
		||||
                    except ValueError:
 | 
			
		||||
                        pass
 | 
			
		||||
                if key == 'big_thumb':
 | 
			
		||||
                    video_thumbnail = val
 | 
			
		||||
 | 
			
		||||
                videourl_match = re.match(
 | 
			
		||||
                    r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
 | 
			
		||||
                if videourl_match:
 | 
			
		||||
                    height = int(videourl_match.group('resolution'))
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'height': height,
 | 
			
		||||
                        'url': val,
 | 
			
		||||
                    })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        # Extract title - should be in the flashvars; if not, look elsewhere
 | 
			
		||||
        if video_title is None:
 | 
			
		||||
            video_title = self._html_search_regex(
 | 
			
		||||
                r'<title>(.*?)</title', webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'thumbnail': video_thumbnail,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'description': None,
 | 
			
		||||
            'age_limit': 18
 | 
			
		||||
        }
 | 
			
		||||
@@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
 | 
			
		||||
        if webpage.find('"encrypted":true') != -1:
 | 
			
		||||
            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
 | 
			
		||||
            password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
 | 
			
		||||
            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 
 | 
			
		||||
@@ -51,14 +51,14 @@ class ProSiebenSat1IE(InfoExtractor):
 | 
			
		||||
            'skip': 'Seems to be broken',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge',
 | 
			
		||||
            'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2437108',
 | 
			
		||||
                'id': '2429369',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Folge 48: Gold Rogers Heimat',
 | 
			
		||||
                'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.',
 | 
			
		||||
                'upload_date': '20140226',
 | 
			
		||||
                'duration': 1401.48,
 | 
			
		||||
                'title': 'Countdown für die Autowerkstatt',
 | 
			
		||||
                'description': 'md5:809fc051a457b5d8666013bc40698817',
 | 
			
		||||
                'upload_date': '20140223',
 | 
			
		||||
                'duration': 2595.04,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # rtmp download
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								youtube_dl/extractor/rutv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,183 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RUTVIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'RUTV.RU'
 | 
			
		||||
    _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '774471',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Монологи на все времена',
 | 
			
		||||
                'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
 | 
			
		||||
                'duration': 2906,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '774016',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Чужой в семье Сталина',
 | 
			
		||||
                'description': '',
 | 
			
		||||
                'duration': 2539,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '766888',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
 | 
			
		||||
                'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
 | 
			
		||||
                'duration': 279,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '771852',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
 | 
			
		||||
                'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
 | 
			
		||||
                'duration': 3096,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '51499',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
 | 
			
		||||
                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # rtmp download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Translation has finished',
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _extract_url(cls, webpage):
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            return mobj.group('url')
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            return mobj.group('url')
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        video_type = mobj.group('type')
 | 
			
		||||
 | 
			
		||||
        if not video_type or video_type == 'swf':
 | 
			
		||||
            video_type = 'video'
 | 
			
		||||
 | 
			
		||||
        json_data = self._download_json(
 | 
			
		||||
            'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
 | 
			
		||||
            video_id, 'Downloading JSON')
 | 
			
		||||
 | 
			
		||||
        if json_data['errors']:
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
 | 
			
		||||
 | 
			
		||||
        playlist = json_data['data']['playlist']
 | 
			
		||||
        medialist = playlist['medialist']
 | 
			
		||||
        media = medialist[0]
 | 
			
		||||
 | 
			
		||||
        if media['errors']:
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
 | 
			
		||||
 | 
			
		||||
        view_count = playlist.get('count_views')
 | 
			
		||||
        priority_transport = playlist['priority_transport']
 | 
			
		||||
 | 
			
		||||
        thumbnail = media['picture']
 | 
			
		||||
        width = int_or_none(media['width'])
 | 
			
		||||
        height = int_or_none(media['height'])
 | 
			
		||||
        description = media['anons']
 | 
			
		||||
        title = media['title']
 | 
			
		||||
        duration = int_or_none(media.get('duration'))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        for transport, links in media['sources'].items():
 | 
			
		||||
            for quality, url in links.items():
 | 
			
		||||
                if transport == 'rtmp':
 | 
			
		||||
                    mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
 | 
			
		||||
                    if not mobj:
 | 
			
		||||
                        continue
 | 
			
		||||
                    fmt = {
 | 
			
		||||
                        'url': mobj.group('url'),
 | 
			
		||||
                        'play_path': mobj.group('playpath'),
 | 
			
		||||
                        'app': mobj.group('app'),
 | 
			
		||||
                        'page_url': 'http://player.rutv.ru',
 | 
			
		||||
                        'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
 | 
			
		||||
                        'rtmp_live': True,
 | 
			
		||||
                        'ext': 'flv',
 | 
			
		||||
                        'vbr': int(quality),
 | 
			
		||||
                    }
 | 
			
		||||
                elif transport == 'm3u8':
 | 
			
		||||
                    fmt = {
 | 
			
		||||
                        'url': url,
 | 
			
		||||
                        'ext': 'mp4',
 | 
			
		||||
                    }
 | 
			
		||||
                else:
 | 
			
		||||
                    fmt = {
 | 
			
		||||
                        'url': url
 | 
			
		||||
                    }
 | 
			
		||||
                fmt.update({
 | 
			
		||||
                    'width': width,
 | 
			
		||||
                    'height': height,
 | 
			
		||||
                    'format_id': '%s-%s' % (transport, quality),
 | 
			
		||||
                    'preference': -1 if priority_transport == transport else -2,
 | 
			
		||||
                })
 | 
			
		||||
                formats.append(fmt)
 | 
			
		||||
 | 
			
		||||
        if not formats:
 | 
			
		||||
            raise ExtractorError('No media links available for %s' % video_id)
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
                'id': '47127627',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Goldrushed',
 | 
			
		||||
                'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
 | 
			
		||||
                'uploader': 'The Royal Concept',
 | 
			
		||||
                'upload_date': '20120521',
 | 
			
		||||
            },
 | 
			
		||||
@@ -217,7 +218,7 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
        return self._extract_info_dict(info, full_title, secret_token=token)
 | 
			
		||||
 | 
			
		||||
class SoundcloudSetIE(SoundcloudIE):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
 | 
			
		||||
    IE_NAME = 'soundcloud:set'
 | 
			
		||||
    # it's in tests/test_playlists.py
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,15 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .mtv import MTVServicesInfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SpikeIE(MTVServicesInfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
 | 
			
		||||
    _VALID_URL = r'''(?x)https?://
 | 
			
		||||
        (www\.spike\.com/(video-clips|episodes)/.+|
 | 
			
		||||
         m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
 | 
			
		||||
        '''
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
 | 
			
		||||
        'md5': '1a9265f32b0c375793d6c4ce45255256',
 | 
			
		||||
@@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _FEED_URL = 'http://www.spike.com/feeds/mrss/'
 | 
			
		||||
    _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.search(self._VALID_URL, url)
 | 
			
		||||
        mobile_id = mobj.group('mobile_id')
 | 
			
		||||
        if mobile_id is not None:
 | 
			
		||||
            url = 'http://www.spike.com/video-clips/%s' % mobile_id
 | 
			
		||||
        return super(SpikeIE, self)._real_extract(url)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,115 +6,114 @@ import re
 | 
			
		||||
from .subtitles import SubtitlesInfoExtractor
 | 
			
		||||
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
    compat_str,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TEDIE(SubtitlesInfoExtractor):
 | 
			
		||||
    _VALID_URL=r'''http://www\.ted\.com/
 | 
			
		||||
                   (
 | 
			
		||||
                        ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
 | 
			
		||||
                        |
 | 
			
		||||
                        ((?P<type_talk>talks)) # We have a simple talk
 | 
			
		||||
                   )
 | 
			
		||||
                   (/lang/(.*?))? # The url may contain the language
 | 
			
		||||
                   /(?P<name>\w+) # Here goes the name and then ".html"
 | 
			
		||||
                   '''
 | 
			
		||||
    _VALID_URL = r'''(?x)http://www\.ted\.com/
 | 
			
		||||
        (
 | 
			
		||||
            (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
 | 
			
		||||
            |
 | 
			
		||||
            ((?P<type_talk>talks)) # We have a simple talk
 | 
			
		||||
        )
 | 
			
		||||
        (/lang/(.*?))? # The url may contain the language
 | 
			
		||||
        /(?P<name>\w+) # Here goes the name and then ".html"
 | 
			
		||||
        '''
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
 | 
			
		||||
        'file': '102.mp4',
 | 
			
		||||
        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
 | 
			
		||||
            "title": "Dan Dennett: The illusion of consciousness"
 | 
			
		||||
            'id': '102',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'The illusion of consciousness',
 | 
			
		||||
            'description': ('Philosopher Dan Dennett makes a compelling '
 | 
			
		||||
                'argument that not only don\'t we understand our own '
 | 
			
		||||
                'consciousness, but that half the time our brains are '
 | 
			
		||||
                'actively fooling us.'),
 | 
			
		||||
            'uploader': 'Dan Dennett',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        """Receives a URL and returns True if suitable for this IE."""
 | 
			
		||||
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 | 
			
		||||
    _FORMATS_PREFERENCE = {
 | 
			
		||||
        'low': 1,
 | 
			
		||||
        'medium': 2,
 | 
			
		||||
        'high': 3,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _extract_info(self, webpage):
 | 
			
		||||
        info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
 | 
			
		||||
            webpage, 'info json')
 | 
			
		||||
        return json.loads(info_json)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        m=re.match(self._VALID_URL, url, re.VERBOSE)
 | 
			
		||||
        m = re.match(self._VALID_URL, url, re.VERBOSE)
 | 
			
		||||
        name = m.group('name')
 | 
			
		||||
        if m.group('type_talk'):
 | 
			
		||||
            return self._talk_info(url)
 | 
			
		||||
        else :
 | 
			
		||||
            playlist_id=m.group('playlist_id')
 | 
			
		||||
            name=m.group('name')
 | 
			
		||||
            self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
 | 
			
		||||
            return [self._playlist_videos_info(url,name,playlist_id)]
 | 
			
		||||
            return self._talk_info(url, name)
 | 
			
		||||
        else:
 | 
			
		||||
            return self._playlist_videos_info(url, name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def _playlist_videos_info(self, url, name, playlist_id):
 | 
			
		||||
    def _playlist_videos_info(self, url, name):
 | 
			
		||||
        '''Returns the videos of the playlist'''
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            url, playlist_id, 'Downloading playlist webpage')
 | 
			
		||||
        matches = re.finditer(
 | 
			
		||||
            r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
 | 
			
		||||
            webpage)
 | 
			
		||||
 | 
			
		||||
        playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
 | 
			
		||||
                                                 webpage, 'playlist title')
 | 
			
		||||
        webpage = self._download_webpage(url, name,
 | 
			
		||||
            'Downloading playlist webpage')
 | 
			
		||||
        info = self._extract_info(webpage)
 | 
			
		||||
        playlist_info = info['playlist']
 | 
			
		||||
 | 
			
		||||
        playlist_entries = [
 | 
			
		||||
            self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
 | 
			
		||||
            for m in matches
 | 
			
		||||
            self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
 | 
			
		||||
            for talk in info['talks']
 | 
			
		||||
        ]
 | 
			
		||||
        return self.playlist_result(
 | 
			
		||||
            playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
 | 
			
		||||
            playlist_entries,
 | 
			
		||||
            playlist_id=compat_str(playlist_info['id']),
 | 
			
		||||
            playlist_title=playlist_info['title'])
 | 
			
		||||
 | 
			
		||||
    def _talk_info(self, url, video_id=0):
 | 
			
		||||
        """Return the video for the talk in the url"""
 | 
			
		||||
        m = re.match(self._VALID_URL, url,re.VERBOSE)
 | 
			
		||||
        video_name = m.group('name')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
 | 
			
		||||
    def _talk_info(self, url, video_name):
 | 
			
		||||
        webpage = self._download_webpage(url, video_name)
 | 
			
		||||
        self.report_extraction(video_name)
 | 
			
		||||
        # If the url includes the language we get the title translated
 | 
			
		||||
        title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
 | 
			
		||||
                                        webpage, 'title')
 | 
			
		||||
        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
 | 
			
		||||
                                    webpage, 'json data')
 | 
			
		||||
        info = json.loads(json_data)
 | 
			
		||||
        desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
 | 
			
		||||
                                       webpage, 'description', flags = re.DOTALL)
 | 
			
		||||
        
 | 
			
		||||
        thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
 | 
			
		||||
                                       webpage, 'thumbnail')
 | 
			
		||||
 | 
			
		||||
        talk_info = self._extract_info(webpage)['talks'][0]
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'url': stream['file'],
 | 
			
		||||
            'format': stream['id']
 | 
			
		||||
        } for stream in info['htmlStreams']]
 | 
			
		||||
 | 
			
		||||
        video_id = info['id']
 | 
			
		||||
            'url': format_url,
 | 
			
		||||
            'format_id': format_id,
 | 
			
		||||
            'format': format_id,
 | 
			
		||||
            'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
 | 
			
		||||
        } for (format_id, format_url) in talk_info['nativeDownloads'].items()]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        video_id = compat_str(talk_info['id'])
 | 
			
		||||
        # subtitles
 | 
			
		||||
        video_subtitles = self.extract_subtitles(video_id, webpage)
 | 
			
		||||
        video_subtitles = self.extract_subtitles(video_id, talk_info)
 | 
			
		||||
        if self._downloader.params.get('listsubtitles', False):
 | 
			
		||||
            self._list_available_subtitles(video_id, webpage)
 | 
			
		||||
            self._list_available_subtitles(video_id, talk_info)
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        thumbnail = talk_info['thumb']
 | 
			
		||||
        if not thumbnail.startswith('http'):
 | 
			
		||||
            thumbnail = 'http://' + thumbnail
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'title': talk_info['title'],
 | 
			
		||||
            'uploader': talk_info['speaker'],
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'description': desc,
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'subtitles': video_subtitles,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _get_available_subtitles(self, video_id, webpage):
 | 
			
		||||
        try:
 | 
			
		||||
            options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
 | 
			
		||||
            languages = re.findall(r'(?:<option value=")(\S+)"', options)
 | 
			
		||||
            if languages:
 | 
			
		||||
                sub_lang_list = {}
 | 
			
		||||
                for l in languages:
 | 
			
		||||
                    url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
 | 
			
		||||
                    sub_lang_list[l] = url
 | 
			
		||||
                return sub_lang_list
 | 
			
		||||
        except RegexNotFoundError:
 | 
			
		||||
    def _get_available_subtitles(self, video_id, talk_info):
 | 
			
		||||
        languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
 | 
			
		||||
        if languages:
 | 
			
		||||
            sub_lang_list = {}
 | 
			
		||||
            for l in languages:
 | 
			
		||||
                url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
 | 
			
		||||
                sub_lang_list[l] = url
 | 
			
		||||
            return sub_lang_list
 | 
			
		||||
        else:
 | 
			
		||||
            self._downloader.report_warning(u'video doesn\'t have subtitles')
 | 
			
		||||
        return {}
 | 
			
		||||
            return {}
 | 
			
		||||
 
 | 
			
		||||
@@ -14,19 +14,32 @@ from ..utils import (
 | 
			
		||||
class TvigleIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'tvigle'
 | 
			
		||||
    IE_DESC = 'Интернет-телевидение Tvigle.ru'
 | 
			
		||||
    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?video=(?P<id>\d+)'
 | 
			
		||||
    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
 | 
			
		||||
        'md5': '09afba4616666249f087efc6dcf83cb3',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '503081',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Брат 2 ',
 | 
			
		||||
            'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
 | 
			
		||||
            'upload_date': '20110919',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
 | 
			
		||||
            'md5': '09afba4616666249f087efc6dcf83cb3',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '503081',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Брат 2 ',
 | 
			
		||||
                'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
 | 
			
		||||
                'upload_date': '20110919',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
 | 
			
		||||
            'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '676433',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
 | 
			
		||||
                'description': 'md5:027f7dc872948f14c96d19b4178428a4',
 | 
			
		||||
                'upload_date': '20121218',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								youtube_dl/extractor/udemy.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,164 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class UdemyIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'udemy'
 | 
			
		||||
    _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
 | 
			
		||||
    _LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
 | 
			
		||||
    _NETRC_MACHINE = 'udemy'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
 | 
			
		||||
        'md5': '98eda5b657e752cf945d8445e261b5c5',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '160614',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Introduction and Installation',
 | 
			
		||||
            'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876',
 | 
			
		||||
            'duration': 579.29,
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Requires udemy account credentials',
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _handle_error(self, response):
 | 
			
		||||
        if not isinstance(response, dict):
 | 
			
		||||
            return
 | 
			
		||||
        error = response.get('error')
 | 
			
		||||
        if error:
 | 
			
		||||
            error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message'))
 | 
			
		||||
            error_data = error.get('data')
 | 
			
		||||
            if error_data:
 | 
			
		||||
                error_str += ' - %s' % error_data.get('formErrors')
 | 
			
		||||
            raise ExtractorError(error_str, expected=True)
 | 
			
		||||
 | 
			
		||||
    def _download_json(self, url, video_id, note='Downloading JSON metadata'):
 | 
			
		||||
        response = super(UdemyIE, self)._download_json(url, video_id, note)
 | 
			
		||||
        self._handle_error(response)
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
        self._login()
 | 
			
		||||
 | 
			
		||||
    def _login(self):
 | 
			
		||||
        (username, password) = self._get_login_info()
 | 
			
		||||
        if username is None:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Udemy account is required, use --username and --password options to provide account credentials.',
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
        login_popup = self._download_webpage(
 | 
			
		||||
            'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
 | 
			
		||||
            'Downloading login popup')
 | 
			
		||||
 | 
			
		||||
        if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token')
 | 
			
		||||
 | 
			
		||||
        login_form = {
 | 
			
		||||
            'email': username,
 | 
			
		||||
            'password': password,
 | 
			
		||||
            'csrf': csrf,
 | 
			
		||||
            'displayType': 'json',
 | 
			
		||||
            'isSubmitted': '1',
 | 
			
		||||
        }
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
 | 
			
		||||
        response = self._download_json(request, None, 'Logging in as %s' % username)
 | 
			
		||||
 | 
			
		||||
        if 'returnUrl' not in response:
 | 
			
		||||
            raise ExtractorError('Unable to log in')
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        lecture_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        lecture = self._download_json(
 | 
			
		||||
            'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON')
 | 
			
		||||
 | 
			
		||||
        if lecture['assetType'] != 'Video':
 | 
			
		||||
            raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True)
 | 
			
		||||
 | 
			
		||||
        asset = lecture['asset']
 | 
			
		||||
 | 
			
		||||
        stream_url = asset['streamUrl']
 | 
			
		||||
        mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            return self.url_result(mobj.group(1), 'Youtube')
 | 
			
		||||
 | 
			
		||||
        video_id = asset['id']
 | 
			
		||||
        thumbnail = asset['thumbnailUrl']
 | 
			
		||||
        duration = asset['data']['duration']
 | 
			
		||||
 | 
			
		||||
        download_url = asset['downloadUrl']
 | 
			
		||||
 | 
			
		||||
        formats = [
 | 
			
		||||
            {
 | 
			
		||||
                'url': download_url['Video480p'][0],
 | 
			
		||||
                'format_id': '360p',
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                'url': download_url['Video'][0],
 | 
			
		||||
                'format_id': '720p',
 | 
			
		||||
            },
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        title = lecture['title']
 | 
			
		||||
        description = lecture['description']
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'formats': formats
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class UdemyCourseIE(UdemyIE):
 | 
			
		||||
    IE_NAME = 'udemy:course'
 | 
			
		||||
    _VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)'
 | 
			
		||||
    _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<'
 | 
			
		||||
    _ALREADY_ENROLLED = '>You are already taking this course.<'
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        course_path = mobj.group('coursepath')
 | 
			
		||||
 | 
			
		||||
        response = self._download_json(
 | 
			
		||||
            'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON')
 | 
			
		||||
 | 
			
		||||
        course_id = int(response['id'])
 | 
			
		||||
        course_title = response['title']
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course')
 | 
			
		||||
 | 
			
		||||
        if self._SUCCESSFULLY_ENROLLED in webpage:
 | 
			
		||||
            self.to_screen('%s: Successfully enrolled in' % course_id)
 | 
			
		||||
        elif self._ALREADY_ENROLLED in webpage:
 | 
			
		||||
            self.to_screen('%s: Already enrolled in' % course_id)
 | 
			
		||||
 | 
			
		||||
        response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
 | 
			
		||||
            course_id, 'Downloading course curriculum')
 | 
			
		||||
 | 
			
		||||
        entries = [
 | 
			
		||||
            self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
 | 
			
		||||
            for asset in response if asset.get('assetType') == 'Video'
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, course_id, course_title)
 | 
			
		||||
@@ -4,14 +4,11 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none
 | 
			
		||||
)
 | 
			
		||||
from ..utils import ExtractorError
 | 
			
		||||
from .rutv import RUTVIE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VestiIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'vesti'
 | 
			
		||||
    IE_DESC = 'Вести.Ru'
 | 
			
		||||
    _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
 | 
			
		||||
 | 
			
		||||
@@ -30,6 +27,20 @@ class VestiIE(InfoExtractor):
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.vesti.ru/doc.html?id=1349233',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '773865',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Участники митинга штурмуют Донецкую областную администрацию',
 | 
			
		||||
                'description': 'md5:1a160e98b3195379b4c849f2f4958009',
 | 
			
		||||
                'duration': 210,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.vesti.ru/only_video.html?vid=576180',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
@@ -44,6 +55,20 @@ class VestiIE(InfoExtractor):
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://hitech.vesti.ru/news/view/id/4000',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '766888',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
 | 
			
		||||
                'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
 | 
			
		||||
                'duration': 279,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
@@ -57,7 +82,7 @@ class VestiIE(InfoExtractor):
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Blocked outside Russia'
 | 
			
		||||
            'skip': 'Blocked outside Russia',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
 | 
			
		||||
@@ -72,7 +97,7 @@ class VestiIE(InfoExtractor):
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Translation has finished'
 | 
			
		||||
        }
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -81,90 +106,16 @@ class VestiIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        page = self._download_webpage(url, video_id, 'Downloading page')
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page)
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
 | 
			
		||||
            page)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            video_type = 'video'
 | 
			
		||||
            video_id = mobj.group('id')
 | 
			
		||||
        else:
 | 
			
		||||
            mobj = re.search(
 | 
			
		||||
                r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page)
 | 
			
		||||
            page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
 | 
			
		||||
                'Downloading video page')
 | 
			
		||||
 | 
			
		||||
            if not mobj:
 | 
			
		||||
                raise ExtractorError('No media found')
 | 
			
		||||
        rutv_url = RUTVIE._extract_url(page)
 | 
			
		||||
        if rutv_url:
 | 
			
		||||
            return self.url_result(rutv_url, 'RUTV')
 | 
			
		||||
 | 
			
		||||
            video_type = mobj.group('type')
 | 
			
		||||
            video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        json_data = self._download_json(
 | 
			
		||||
            'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
 | 
			
		||||
            video_id, 'Downloading JSON')
 | 
			
		||||
 | 
			
		||||
        if json_data['errors']:
 | 
			
		||||
            raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True)
 | 
			
		||||
 | 
			
		||||
        playlist = json_data['data']['playlist']
 | 
			
		||||
        medialist = playlist['medialist']
 | 
			
		||||
        media = medialist[0]
 | 
			
		||||
 | 
			
		||||
        if media['errors']:
 | 
			
		||||
            raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True)
 | 
			
		||||
 | 
			
		||||
        view_count = playlist.get('count_views')
 | 
			
		||||
        priority_transport = playlist['priority_transport']
 | 
			
		||||
 | 
			
		||||
        thumbnail = media['picture']
 | 
			
		||||
        width = media['width']
 | 
			
		||||
        height = media['height']
 | 
			
		||||
        description = media['anons']
 | 
			
		||||
        title = media['title']
 | 
			
		||||
        duration = int_or_none(media.get('duration'))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        for transport, links in media['sources'].items():
 | 
			
		||||
            for quality, url in links.items():
 | 
			
		||||
                if transport == 'rtmp':
 | 
			
		||||
                    mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
 | 
			
		||||
                    if not mobj:
 | 
			
		||||
                        continue
 | 
			
		||||
                    fmt = {
 | 
			
		||||
                        'url': mobj.group('url'),
 | 
			
		||||
                        'play_path': mobj.group('playpath'),
 | 
			
		||||
                        'app': mobj.group('app'),
 | 
			
		||||
                        'page_url': 'http://player.rutv.ru',
 | 
			
		||||
                        'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
 | 
			
		||||
                        'rtmp_live': True,
 | 
			
		||||
                        'ext': 'flv',
 | 
			
		||||
                        'vbr': int(quality),
 | 
			
		||||
                    }
 | 
			
		||||
                elif transport == 'm3u8':
 | 
			
		||||
                    fmt = {
 | 
			
		||||
                        'url': url,
 | 
			
		||||
                        'ext': 'mp4',
 | 
			
		||||
                    }
 | 
			
		||||
                else:
 | 
			
		||||
                    fmt = {
 | 
			
		||||
                        'url': url
 | 
			
		||||
                    }
 | 
			
		||||
                fmt.update({
 | 
			
		||||
                    'width': width,
 | 
			
		||||
                    'height': height,
 | 
			
		||||
                    'format_id': '%s-%s' % (transport, quality),
 | 
			
		||||
                    'preference': -1 if priority_transport == transport else -2,
 | 
			
		||||
                })
 | 
			
		||||
                formats.append(fmt)
 | 
			
		||||
 | 
			
		||||
        if not formats:
 | 
			
		||||
            raise ExtractorError('No media links available for %s' % video_id)
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
        raise ExtractorError('No video found', expected=True)
 | 
			
		||||
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
@@ -22,6 +21,7 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
           https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
 | 
			
		||||
           vevo:)
 | 
			
		||||
        (?P<id>[^&?#]+)'''
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
 | 
			
		||||
        "md5": "06bea460acb744eab74a9d7dcb4bfd61",
 | 
			
		||||
@@ -34,6 +34,8 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
            "duration": 230.12,
 | 
			
		||||
            "width": 1920,
 | 
			
		||||
            "height": 1080,
 | 
			
		||||
            # timestamp and upload_date are often incorrect; seem to change randomly
 | 
			
		||||
            'timestamp': int,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'note': 'v3 SMIL format',
 | 
			
		||||
@@ -47,6 +49,7 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
            'title': 'I Wish I Could Break Your Heart',
 | 
			
		||||
            'duration': 226.101,
 | 
			
		||||
            'age_limit': 0,
 | 
			
		||||
            'timestamp': int,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'note': 'Age-limited video',
 | 
			
		||||
@@ -57,7 +60,8 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
            'title': 'Tunnel Vision (Explicit)',
 | 
			
		||||
            'uploader': 'Justin Timberlake',
 | 
			
		||||
            'upload_date': '20130704',
 | 
			
		||||
            'upload_date': 're:2013070[34]',
 | 
			
		||||
            'timestamp': int,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': 'true',
 | 
			
		||||
@@ -169,13 +173,13 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        timestamp_ms = int(self._search_regex(
 | 
			
		||||
            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
 | 
			
		||||
        upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video_info['title'],
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'thumbnail': video_info['imageUrl'],
 | 
			
		||||
            'upload_date': upload_date.strftime('%Y%m%d'),
 | 
			
		||||
            'timestamp': timestamp_ms // 1000,
 | 
			
		||||
            'uploader': video_info['mainArtists'][0]['artistName'],
 | 
			
		||||
            'duration': video_info['duration'],
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
 
 | 
			
		||||
@@ -29,6 +29,7 @@ class VideoBamIE(InfoExtractor):
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'pqLvq',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': '_',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
@@ -61,7 +62,7 @@ class VideoBamIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(page, default='VideoBam', fatal=False)
 | 
			
		||||
        title = self._og_search_title(page, default='_', fatal=False)
 | 
			
		||||
        description = self._og_search_description(page, default=None)
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(page)
 | 
			
		||||
        uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,22 +1,23 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .internetvideoarchive import InternetVideoArchiveIE
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import compat_urlparse
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VideoDetectiveIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
 | 
			
		||||
        u'file': u'194487.mp4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'KICK-ASS 2',
 | 
			
		||||
            u'description': u'md5:65ba37ad619165afac7d432eaded6013',
 | 
			
		||||
            u'duration': 135,
 | 
			
		||||
        'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '194487',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'KICK-ASS 2',
 | 
			
		||||
            'description': 'md5:65ba37ad619165afac7d432eaded6013',
 | 
			
		||||
            'duration': 135,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        og_video = self._og_search_video_url(webpage)
 | 
			
		||||
        query = compat_urlparse.urlparse(og_video).query
 | 
			
		||||
        return self.url_result(InternetVideoArchiveIE._build_url(query),
 | 
			
		||||
            ie=InternetVideoArchiveIE.ie_key())
 | 
			
		||||
        return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,7 @@ import itertools
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .subtitles import SubtitlesInfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_HTTPError,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    clean_html,
 | 
			
		||||
@@ -101,6 +102,15 @@ class VimeoIE(SubtitlesInfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        if VimeoChannelIE.suitable(url):
 | 
			
		||||
            # Otherwise channel urls like http://vimeo.com/channels/31259 would
 | 
			
		||||
            # match
 | 
			
		||||
            return False
 | 
			
		||||
        else:
 | 
			
		||||
            return super(VimeoIE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
    def _login(self):
 | 
			
		||||
        (username, password) = self._get_login_info()
 | 
			
		||||
        if username is None:
 | 
			
		||||
@@ -172,7 +182,18 @@ class VimeoIE(SubtitlesInfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Retrieve video webpage to extract further information
 | 
			
		||||
        request = compat_urllib_request.Request(url, None, headers)
 | 
			
		||||
        webpage = self._download_webpage(request, video_id)
 | 
			
		||||
        try:
 | 
			
		||||
            webpage = self._download_webpage(request, video_id)
 | 
			
		||||
        except ExtractorError as ee:
 | 
			
		||||
            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
 | 
			
		||||
                errmsg = ee.cause.read()
 | 
			
		||||
                if b'Because of its privacy settings, this video cannot be played here' in errmsg:
 | 
			
		||||
                    raise ExtractorError(
 | 
			
		||||
                        'Cannot download embed-only video without embedding '
 | 
			
		||||
                        'URL. Please call youtube-dl with the URL of the page '
 | 
			
		||||
                        'that embeds this video.',
 | 
			
		||||
                        expected=True)
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        # Now we begin extracting as much information as we can from what we
 | 
			
		||||
        # retrieved. First we extract the information common to all extractors,
 | 
			
		||||
@@ -320,7 +341,7 @@ class VimeoIE(SubtitlesInfoExtractor):
 | 
			
		||||
 | 
			
		||||
class VimeoChannelIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'vimeo:channel'
 | 
			
		||||
    _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)'
 | 
			
		||||
    _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$'
 | 
			
		||||
    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
 | 
			
		||||
    _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
 | 
			
		||||
        'md5': 'f81dcf6d0448e3291f54380181695821',
 | 
			
		||||
        'md5': 'db7aba89d4603dadd627e9d1973946fe',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'YL2qNPkqon',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -77,4 +77,4 @@ class VubeIE(InfoExtractor):
 | 
			
		||||
            'like_count': like_count,
 | 
			
		||||
            'dislike_count': dislike_count,
 | 
			
		||||
            'comment_count': comment_count,
 | 
			
		||||
        }
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								youtube_dl/extractor/wdr.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,114 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WDRIE(InfoExtractor):
 | 
			
		||||
    _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
 | 
			
		||||
    _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'mdb-362427',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Servicezeit',
 | 
			
		||||
                'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
 | 
			
		||||
                'upload_date': '20140310',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'mdb-363194',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Marga Spiegel ist tot',
 | 
			
		||||
                'description': 'md5:2309992a6716c347891c045be50992e4',
 | 
			
		||||
                'upload_date': '20140311',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
 | 
			
		||||
            'md5': '83e9e8fefad36f357278759870805898',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'mdb-194332',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
 | 
			
		||||
                'description': 'md5:2309992a6716c347891c045be50992e4',
 | 
			
		||||
                'upload_date': '20091129',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html',
 | 
			
		||||
            'md5': 'cfff440d4ee64114083ac44676df5d15',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'mdb-363068',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Grenzenlos lecker - Baklava',
 | 
			
		||||
                'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
 | 
			
		||||
                'upload_date': '20140311',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        page_url = mobj.group('url')
 | 
			
		||||
        page_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, page_id)
 | 
			
		||||
 | 
			
		||||
        if mobj.group('player') is None:
 | 
			
		||||
            entries = [
 | 
			
		||||
                self.url_result(page_url + href, 'WDR')
 | 
			
		||||
                for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
 | 
			
		||||
            ]
 | 
			
		||||
            return self.playlist_result(entries, page_id)
 | 
			
		||||
 | 
			
		||||
        flashvars = compat_urlparse.parse_qs(
 | 
			
		||||
            self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
 | 
			
		||||
 | 
			
		||||
        page_id = flashvars['trackerClipId'][0]
 | 
			
		||||
        video_url = flashvars['dslSrc'][0]
 | 
			
		||||
        title = flashvars['trackerClipTitle'][0]
 | 
			
		||||
        thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
 | 
			
		||||
 | 
			
		||||
        if 'trackerClipAirTime' in flashvars:
 | 
			
		||||
            upload_date = flashvars['trackerClipAirTime'][0]
 | 
			
		||||
        else:
 | 
			
		||||
            upload_date = self._html_search_meta('DC.Date', webpage, 'upload date')
 | 
			
		||||
 | 
			
		||||
        if upload_date:
 | 
			
		||||
            upload_date = unified_strdate(upload_date)
 | 
			
		||||
 | 
			
		||||
        if video_url.endswith('.f4m'):
 | 
			
		||||
            video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
 | 
			
		||||
            ext = 'flv'
 | 
			
		||||
        else:
 | 
			
		||||
            ext = determine_ext(video_url)
 | 
			
		||||
 | 
			
		||||
        description = self._html_search_meta('Description', webpage, 'description')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': page_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': ext,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,55 +1,49 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XNXXIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
 | 
			
		||||
    VIDEO_URL_RE = r'flv_url=(.*?)&'
 | 
			
		||||
    VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
 | 
			
		||||
    VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
 | 
			
		||||
    _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
 | 
			
		||||
        u'file': u'1135332.flv',
 | 
			
		||||
        u'md5': u'0831677e2b4761795f68d417e0b7b445',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"lida \u00bb Naked Funny Actress  (5)",
 | 
			
		||||
            u"age_limit": 18,
 | 
			
		||||
        'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
 | 
			
		||||
        'md5': '0831677e2b4761795f68d417e0b7b445',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1135332',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'lida » Naked Funny Actress  (5)',
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        # Get webpage content
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(self.VIDEO_URL_RE,
 | 
			
		||||
            webpage, u'video URL')
 | 
			
		||||
        video_url = self._search_regex(r'flv_url=(.*?)&',
 | 
			
		||||
            webpage, 'video URL')
 | 
			
		||||
        video_url = compat_urllib_parse.unquote(video_url)
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
 | 
			
		||||
            webpage, u'title')
 | 
			
		||||
        video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
 | 
			
		||||
            webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
 | 
			
		||||
            webpage, u'thumbnail', fatal=False)
 | 
			
		||||
        video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
 | 
			
		||||
            webpage, 'thumbnail', fatal=False)
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'uploader': None,
 | 
			
		||||
            'upload_date': None,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'thumbnail': video_thumbnail,
 | 
			
		||||
            'description': None,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }]
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -7,19 +7,24 @@ from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XTubeIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
 | 
			
		||||
        'file': 'kVTUy_G222_.mp4',
 | 
			
		||||
        'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            "title": "strange erotica",
 | 
			
		||||
            "description": "surreal gay themed erotica...almost an ET kind of thing",
 | 
			
		||||
            "uploader": "greenshowers",
 | 
			
		||||
            "age_limit": 18,
 | 
			
		||||
            'id': 'kVTUy_G222_',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'strange erotica',
 | 
			
		||||
            'description': 'surreal gay themed erotica...almost an ET kind of thing',
 | 
			
		||||
            'uploader': 'greenshowers',
 | 
			
		||||
            'duration': 450,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor):
 | 
			
		||||
        req.add_header('Cookie', 'age_verified=1')
 | 
			
		||||
        webpage = self._download_webpage(req, video_id)
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title')
 | 
			
		||||
        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
 | 
			
		||||
        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False)
 | 
			
		||||
        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
 | 
			
		||||
        video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
 | 
			
		||||
        video_uploader = self._html_search_regex(
 | 
			
		||||
            r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
 | 
			
		||||
        video_description = self._html_search_regex(
 | 
			
		||||
            r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
 | 
			
		||||
        video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
 | 
			
		||||
        duration = parse_duration(self._html_search_regex(
 | 
			
		||||
            r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
 | 
			
		||||
        view_count = self._html_search_regex(
 | 
			
		||||
            r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
 | 
			
		||||
        if view_count:
 | 
			
		||||
            view_count = str_to_int(view_count)
 | 
			
		||||
        comment_count = self._html_search_regex(
 | 
			
		||||
            r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
 | 
			
		||||
        if comment_count:
 | 
			
		||||
            comment_count = str_to_int(comment_count)
 | 
			
		||||
 | 
			
		||||
        path = compat_urllib_parse_urlparse(video_url).path
 | 
			
		||||
        extension = os.path.splitext(path)[1][1:]
 | 
			
		||||
        format = path.split('/')[5].split('_')[:2]
 | 
			
		||||
@@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor):
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
            'description': video_description,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'comment_count': comment_count,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': extension,
 | 
			
		||||
            'format': format,
 | 
			
		||||
 
 | 
			
		||||
@@ -194,14 +194,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
        '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
 | 
			
		||||
 | 
			
		||||
        # DASH mp4 video
 | 
			
		||||
        '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40},
 | 
			
		||||
        '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
 | 
			
		||||
        # Dash mp4 audio
 | 
			
		||||
        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
 | 
			
		||||
@@ -209,12 +209,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
 | 
			
		||||
 | 
			
		||||
        # Dash webm
 | 
			
		||||
        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 | 
			
		||||
        '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
 | 
			
		||||
        '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
 | 
			
		||||
        '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
 | 
			
		||||
@@ -1285,10 +1285,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Decide which formats to download
 | 
			
		||||
        try:
 | 
			
		||||
            mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
 | 
			
		||||
            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
 | 
			
		||||
            if not mobj:
 | 
			
		||||
                raise ValueError('Could not find vevo ID')
 | 
			
		||||
            ytplayer_config = json.loads(mobj.group(1))
 | 
			
		||||
            json_code = uppercase_escape(mobj.group(1))
 | 
			
		||||
            ytplayer_config = json.loads(json_code)
 | 
			
		||||
            args = ytplayer_config['args']
 | 
			
		||||
            # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
 | 
			
		||||
            # this signatures are encrypted
 | 
			
		||||
 
 | 
			
		||||
@@ -22,6 +22,7 @@ import struct
 | 
			
		||||
import subprocess
 | 
			
		||||
import sys
 | 
			
		||||
import traceback
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
import zlib
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
@@ -777,6 +778,7 @@ def unified_strdate(date_str):
 | 
			
		||||
        '%Y/%m/%d %H:%M:%S',
 | 
			
		||||
        '%Y-%m-%d %H:%M:%S',
 | 
			
		||||
        '%d.%m.%Y %H:%M',
 | 
			
		||||
        '%d.%m.%Y %H.%M',
 | 
			
		||||
        '%Y-%m-%dT%H:%M:%SZ',
 | 
			
		||||
        '%Y-%m-%dT%H:%M:%S.%fZ',
 | 
			
		||||
        '%Y-%m-%dT%H:%M:%S.%f0Z',
 | 
			
		||||
@@ -1263,3 +1265,17 @@ def read_batch_urls(batch_fd):
 | 
			
		||||
 | 
			
		||||
    with contextlib.closing(batch_fd) as fd:
 | 
			
		||||
        return [url for url in map(fixup, fd) if url]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def urlencode_postdata(*args, **kargs):
 | 
			
		||||
    return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_xml(s):
 | 
			
		||||
    class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
 | 
			
		||||
        def doctype(self, name, pubid, system):
 | 
			
		||||
            pass  # Ignore doctypes
 | 
			
		||||
 | 
			
		||||
    parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
 | 
			
		||||
    kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
 | 
			
		||||
    return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,2 +1,2 @@
 | 
			
		||||
 | 
			
		||||
__version__ = '2014.03.04.1'
 | 
			
		||||
__version__ = '2014.03.17'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user