Compare commits
	
		
			165 Commits
		
	
	
		
			2013.11.13
			...
			rtmp_test
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 00b350d209 | ||
|   | d8ec4959c8 | ||
|   | d31209a144 | ||
|   | 529a2e2cc3 | ||
|   | 781a7d0546 | ||
|   | fb04e40396 | ||
|   | d9b011f201 | ||
|   | b0b9eaa196 | ||
|   | 8b134b1062 | ||
|   | 0c75c3fa7a | ||
|   | a3927cf7ee | ||
|   | 1a62c18f65 | ||
|   | 2a15e7063b | ||
|   | d46cc192d7 | ||
|   | bb2bebdbe1 | ||
|   | 5db07df634 | ||
|   | ea36cbac5e | ||
|   | d0d2b49ab7 | ||
|   | 31cb6d8fef | ||
|   | daa0dd2973 | ||
|   | de79c46c8f | ||
|   | 94ccb6fa2e | ||
|   | 07e4035879 | ||
|   | d0efb9ec9a | ||
|   | ac05067d3d | ||
|   | 113577e155 | ||
|   | 79d09f47c2 | ||
|   | c059bdd432 | ||
|   | 02dbf93f0e | ||
|   | 1fb2bcbbf7 | ||
|   | 16e055849e | ||
|   | 66cfab4226 | ||
|   | 6d88bc37a3 | ||
|   | b7553b2554 | ||
|   | e03db0a077 | ||
|   | a1ee09e815 | ||
|   | 267ed0c5d3 | ||
|   | f459d17018 | ||
|   | dc65dcbb6d | ||
|   | d214fdb8fe | ||
|   | 138df537ff | ||
|   | 0c7c19d6bc | ||
|   | eaaafc59c2 | ||
|   | 382ed50e0e | ||
|   | 66ec019240 | ||
|   | bd49928f7a | ||
|   | 23e6d50d73 | ||
|   | 2e767313e4 | ||
|   | 38b2db6a66 | ||
|   | 13ebea791f | ||
|   | 4c9c57428f | ||
|   | 8bf9319e9c | ||
|   | 4914120727 | ||
|   | 36de0a0e1a | ||
|   | e5c146d586 | ||
|   | 52ad14aeb0 | ||
|   | 43afe28588 | ||
|   | a87b0615aa | ||
|   | d7386f6276 | ||
|   | 081640940e | ||
|   | 7012b23c94 | ||
|   | d3b30148ed | ||
|   | 9f79463803 | ||
|   | d35dc6d3b5 | ||
|   | 50123be421 | ||
|   | 3f8ced5144 | ||
|   | 00ea0f11eb | ||
|   | dca0872056 | ||
|   | 0b63aed8df | ||
|   | 15c3adbb16 | ||
|   | f143a42fe6 | ||
|   | 241650c7ff | ||
|   | bfe7439a20 | ||
|   | cffa6aa107 | ||
|   | 02e4ebbbad | ||
|   | ab009f59ef | ||
|   | 0980426559 | ||
|   | b1c9c66936 | ||
|   | a6a173c2fd | ||
|   | 2bb683c201 | ||
|   | 64bb5187f5 | ||
|   | 9e4f50a8ae | ||
|   | 0190eecc00 | ||
|   | ca872a4c0b | ||
|   | f2e87ef4fa | ||
|   | 0ad97bbc05 | ||
|   | c4864091a1 | ||
|   | 9a98a466b3 | ||
|   | f99e0f1ed6 | ||
|   | d323bcb152 | ||
|   | da6a795fdb | ||
|   | c5edcde21f | ||
|   | 15ff3c831e | ||
|   | 100959a6d9 | ||
|   | 0a120f74b2 | ||
|   | 8f05351984 | ||
|   | 4eb92208a3 | ||
|   | 71791f414c | ||
|   | f3682997d7 | ||
|   | cc13cc0251 | ||
|   | 86bd5f2ca9 | ||
|   | 8694c60000 | ||
|   | 9d1538182f | ||
|   | 5904088811 | ||
|   | 69545c2aff | ||
|   | 495da337ae | ||
|   | 34b3afc7be | ||
|   | 00373a4c5d | ||
|   | cb7dfeeac4 | ||
|   | efd6c574a2 | ||
|   | 4113e6ab56 | ||
|   | 9a942a4671 | ||
|   | 9906d397a0 | ||
|   | ae8f787141 | ||
|   | a81b4d5c8f | ||
|   | 887c6acdf2 | ||
|   | 83aa529330 | ||
|   | 96b31b6533 | ||
|   | fccd377198 | ||
|   | 2b35c9ef74 | ||
|   | 73c566695f | ||
|   | 63b7b7224a | ||
|   | ce80c8b8ee | ||
|   | 749febf4d1 | ||
|   | bdde425cbe | ||
|   | 746f491f82 | ||
|   | 1672647ade | ||
|   | 90b6bbc38c | ||
|   | ce02ed60f2 | ||
|   | 1e5b9a95fd | ||
|   | 1d699755e0 | ||
|   | ddf49c6344 | ||
|   | ba3881dffd | ||
|   | d1c252048b | ||
|   | eab2724138 | ||
|   | 21ea3e06c9 | ||
|   | 52d703d3d1 | ||
|   | ce152341a1 | ||
|   | f058e34011 | ||
|   | b5349e8721 | ||
|   | 7150858d49 | ||
|   | 91c7271aab | ||
|   | aa13b2dffd | ||
|   | fc2ef392be | ||
|   | 463a908705 | ||
|   | d24ffe1cfa | ||
|   | 78fb87b283 | ||
|   | ab2d524780 | ||
|   | 85d61685f1 | ||
|   | b9643eed7c | ||
|   | feee2ecfa9 | ||
|   | a25a5cfeec | ||
|   | 0e145dd541 | ||
|   | 9f9be844fc | ||
|   | e3b9ab5e18 | ||
|   | c66d2baa9c | ||
|   | 08bc37cdd0 | ||
|   | 9771cceb2c | ||
|   | ca715127a2 | ||
|   | ea7a7af1d4 | ||
|   | 880e1c529d | ||
|   | dcbb45803f | ||
|   | 0bd59f3723 | ||
|   | 4894fe8c5b | ||
|   | d5a9bb4ea9 | 
| @@ -3,6 +3,9 @@ python: | ||||
|   - "2.6" | ||||
|   - "2.7" | ||||
|   - "3.3" | ||||
| before_install: | ||||
|   - sudo apt-get update -qq | ||||
|   - sudo apt-get install -qq rtmpdump | ||||
| script: nosetests test --verbose | ||||
| notifications: | ||||
|   email: | ||||
|   | ||||
| @@ -123,6 +123,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --get-description          simulate, quiet but print video description | ||||
|     --get-filename             simulate, quiet but print output filename | ||||
|     --get-format               simulate, quiet but print output format | ||||
|     -j, --dump-json            simulate, quiet but print JSON information | ||||
|     --newline                  output progress bar as new lines | ||||
|     --no-progress              do not print progress bar | ||||
|     --console-title            display progress in console titlebar | ||||
|   | ||||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							| @@ -48,7 +48,7 @@ else: | ||||
|         'data_files': [  # Installing system-wide would require sudo... | ||||
|             ('etc/bash_completion.d', ['youtube-dl.bash-completion']), | ||||
|             ('share/doc/youtube_dl', ['README.txt']), | ||||
|             ('share/man/man1/', ['youtube-dl.1']) | ||||
|             ('share/man/man1', ['youtube-dl.1']) | ||||
|         ] | ||||
|     } | ||||
|     if setuptools_available: | ||||
|   | ||||
| @@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL | ||||
| from youtube_dl.utils import preferredencoding | ||||
|  | ||||
|  | ||||
| def global_setup(): | ||||
|     youtube_dl._setup_opener(timeout=10) | ||||
|  | ||||
|  | ||||
| def get_params(override=None): | ||||
|     PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), | ||||
|                                    "parameters.json") | ||||
|   | ||||
| @@ -6,8 +6,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import global_setup, try_rm | ||||
| global_setup() | ||||
| from test.helper import try_rm | ||||
|  | ||||
|  | ||||
| from youtube_dl import YoutubeDL | ||||
| @@ -24,7 +23,7 @@ def _download_restricted(url, filename, age): | ||||
|     } | ||||
|     ydl = YoutubeDL(params) | ||||
|     ydl.add_default_info_extractors() | ||||
|     json_filename = filename + '.info.json' | ||||
|     json_filename = os.path.splitext(filename)[0] + '.info.json' | ||||
|     try_rm(json_filename) | ||||
|     ydl.download([url]) | ||||
|     res = os.path.exists(json_filename) | ||||
|   | ||||
| @@ -100,10 +100,11 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|     def test_keywords(self): | ||||
|         self.assertMatch(':ytsubs', ['youtube:subscriptions']) | ||||
|         self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) | ||||
|         self.assertMatch(':thedailyshow', ['ComedyCentral']) | ||||
|         self.assertMatch(':tds', ['ComedyCentral']) | ||||
|         self.assertMatch(':colbertreport', ['ComedyCentral']) | ||||
|         self.assertMatch(':cr', ['ComedyCentral']) | ||||
|         self.assertMatch(':ythistory', ['youtube:history']) | ||||
|         self.assertMatch(':thedailyshow', ['ComedyCentralShows']) | ||||
|         self.assertMatch(':tds', ['ComedyCentralShows']) | ||||
|         self.assertMatch(':colbertreport', ['ComedyCentralShows']) | ||||
|         self.assertMatch(':cr', ['ComedyCentralShows']) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   | ||||
| @@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| from test.helper import ( | ||||
|     get_params, | ||||
|     get_testcases, | ||||
|     global_setup, | ||||
|     try_rm, | ||||
|     md5, | ||||
|     report_warning | ||||
| ) | ||||
| global_setup() | ||||
|  | ||||
|  | ||||
| import hashlib | ||||
| @@ -103,7 +101,7 @@ def generator(test_case): | ||||
|                 tc_filename = get_tc_filename(tc) | ||||
|                 try_rm(tc_filename) | ||||
|                 try_rm(tc_filename + '.part') | ||||
|                 try_rm(tc_filename + '.info.json') | ||||
|                 try_rm(os.path.splitext(tc_filename)[0] + '.info.json') | ||||
|         try_rm_tcs_files() | ||||
|         try: | ||||
|             try_num = 1 | ||||
| @@ -130,11 +128,12 @@ def generator(test_case): | ||||
|                 if not test_case.get('params', {}).get('skip_download', False): | ||||
|                     self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) | ||||
|                     self.assertTrue(tc_filename in finished_hook_called) | ||||
|                 self.assertTrue(os.path.exists(tc_filename + '.info.json')) | ||||
|                 info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' | ||||
|                 self.assertTrue(os.path.exists(info_json_fn)) | ||||
|                 if 'md5' in tc: | ||||
|                     md5_for_file = _file_md5(tc_filename) | ||||
|                     self.assertEqual(md5_for_file, tc['md5']) | ||||
|                 with io.open(tc_filename + '.info.json', encoding='utf-8') as infof: | ||||
|                 with io.open(info_json_fn, encoding='utf-8') as infof: | ||||
|                     info_dict = json.load(infof) | ||||
|                 for (info_field, expected) in tc.get('info_dict', {}).items(): | ||||
|                     if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|   | ||||
| @@ -8,8 +8,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import FakeYDL, global_setup | ||||
| global_setup() | ||||
| from test.helper import FakeYDL | ||||
|  | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
| @@ -22,6 +21,7 @@ from youtube_dl.extractor import ( | ||||
|     LivestreamIE, | ||||
|     NHLVideocenterIE, | ||||
|     BambuserChannelIE, | ||||
|     BandcampAlbumIE | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -101,7 +101,15 @@ class TestPlaylists(unittest.TestCase): | ||||
|         result = ie.extract('http://bambuser.com/channel/pixelversity') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'pixelversity') | ||||
|         self.assertTrue(len(result['entries']) >= 66) | ||||
|         self.assertTrue(len(result['entries']) >= 60) | ||||
|  | ||||
|     def test_bandcamp_album(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = BandcampAlbumIE(dl) | ||||
|         result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'Nightmare Night EP') | ||||
|         self.assertTrue(len(result['entries']) >= 4) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -6,8 +6,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import FakeYDL, global_setup, md5 | ||||
| global_setup() | ||||
| from test.helper import FakeYDL, md5 | ||||
|  | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
|   | ||||
| @@ -24,6 +24,8 @@ from youtube_dl.utils import ( | ||||
|     xpath_with_ns, | ||||
|     smuggle_url, | ||||
|     unsmuggle_url, | ||||
|     shell_quote, | ||||
|     encodeFilename, | ||||
| ) | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
| @@ -170,6 +172,10 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(res_url, url) | ||||
|         self.assertEqual(res_data, None) | ||||
|  | ||||
|     def test_shell_quote(self): | ||||
|         args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')] | ||||
|         self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""") | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -7,8 +7,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import get_params, global_setup, try_rm | ||||
| global_setup() | ||||
| from test.helper import get_params, try_rm | ||||
|  | ||||
|  | ||||
| import io | ||||
|   | ||||
| @@ -7,8 +7,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import get_params, global_setup | ||||
| global_setup() | ||||
| from test.helper import get_params | ||||
|  | ||||
|  | ||||
| import io | ||||
| @@ -31,7 +30,7 @@ params = get_params({ | ||||
|  | ||||
|  | ||||
| TEST_ID = 'BaW_jenozKc' | ||||
| INFO_JSON_FILE = TEST_ID + '.mp4.info.json' | ||||
| INFO_JSON_FILE = TEST_ID + '.info.json' | ||||
| DESCRIPTION_FILE = TEST_ID + '.mp4.description' | ||||
| EXPECTED_DESCRIPTION = u'''test chars:  "'/\ä↭𝕐 | ||||
|  | ||||
|   | ||||
| @@ -6,8 +6,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import FakeYDL, global_setup | ||||
| global_setup() | ||||
| from test.helper import FakeYDL | ||||
|  | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
| @@ -27,7 +26,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_youtube_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'ytdl test PL') | ||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||
| @@ -44,13 +43,13 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_issue_673(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('PLBB231211A4F62143')[0] | ||||
|         result = ie.extract('PLBB231211A4F62143') | ||||
|         self.assertTrue(len(result['entries']) > 25) | ||||
|  | ||||
|     def test_youtube_playlist_long(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertTrue(len(result['entries']) >= 799) | ||||
|  | ||||
| @@ -58,7 +57,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         #651 | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | ||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||
|         self.assertFalse('pElCt5oNDuI' in ytie_results) | ||||
|         self.assertFalse('KdPEApIVdWM' in ytie_results) | ||||
| @@ -66,7 +65,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_youtube_playlist_empty(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(len(result['entries']), 0) | ||||
|  | ||||
| @@ -74,7 +73,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         # TODO find a > 100 (paginating?) videos course | ||||
|         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] | ||||
|         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         entries = result['entries'] | ||||
|         self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') | ||||
|         self.assertEqual(len(entries), 25) | ||||
| @@ -84,22 +83,22 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeChannelIE(dl) | ||||
|         #test paginated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] | ||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') | ||||
|         self.assertTrue(len(result['entries']) > 90) | ||||
|         #test autogenerated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] | ||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||
|         self.assertTrue(len(result['entries']) >= 18) | ||||
|  | ||||
|     def test_youtube_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeUserIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] | ||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') | ||||
|         self.assertTrue(len(result['entries']) >= 320) | ||||
|  | ||||
|     def test_youtube_safe_search(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] | ||||
|         result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl') | ||||
|         self.assertEqual(len(result['entries']), 2) | ||||
|  | ||||
|     def test_youtube_show(self): | ||||
|   | ||||
| @@ -6,9 +6,6 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import global_setup | ||||
| global_setup() | ||||
|  | ||||
|  | ||||
| import io | ||||
| import re | ||||
|   | ||||
| @@ -1,19 +1,16 @@ | ||||
| import math | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| import sys | ||||
| import time | ||||
|  | ||||
| if os.name == 'nt': | ||||
|     import ctypes | ||||
|  | ||||
| from .utils import ( | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|     ContentTooShortError, | ||||
|     determine_ext, | ||||
|     encodeFilename, | ||||
|     format_bytes, | ||||
|     sanitize_open, | ||||
|     timeconvert, | ||||
| ) | ||||
| @@ -56,20 +53,6 @@ class FileDownloader(object): | ||||
|         self._progress_hooks = [] | ||||
|         self.params = params | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_bytes(bytes): | ||||
|         if bytes is None: | ||||
|             return 'N/A' | ||||
|         if type(bytes) is str: | ||||
|             bytes = float(bytes) | ||||
|         if bytes == 0.0: | ||||
|             exponent = 0 | ||||
|         else: | ||||
|             exponent = int(math.log(bytes, 1024.0)) | ||||
|         suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent] | ||||
|         converted = float(bytes) / float(1024 ** exponent) | ||||
|         return '%.2f%s' % (converted, suffix) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_seconds(seconds): | ||||
|         (mins, secs) = divmod(seconds, 60) | ||||
| @@ -120,7 +103,7 @@ class FileDownloader(object): | ||||
|     def format_speed(speed): | ||||
|         if speed is None: | ||||
|             return '%10s' % '---b/s' | ||||
|         return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed)) | ||||
|         return '%10s' % ('%s/s' % format_bytes(speed)) | ||||
|  | ||||
|     @staticmethod | ||||
|     def best_block_size(elapsed_time, bytes): | ||||
| @@ -151,16 +134,8 @@ class FileDownloader(object): | ||||
|     def to_stderr(self, message): | ||||
|         self.ydl.to_screen(message) | ||||
|  | ||||
|     def to_cons_title(self, message): | ||||
|         """Set console/terminal window title to message.""" | ||||
|         if not self.params.get('consoletitle', False): | ||||
|             return | ||||
|         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): | ||||
|             # c_wchar_p() might not be necessary if `message` is | ||||
|             # already of type unicode() | ||||
|             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) | ||||
|         elif 'TERM' in os.environ: | ||||
|             self.to_screen('\033]0;%s\007' % message, skip_eol=True) | ||||
|     def to_console_title(self, message): | ||||
|         self.ydl.to_console_title(message) | ||||
|  | ||||
|     def trouble(self, *args, **kargs): | ||||
|         self.ydl.trouble(*args, **kargs) | ||||
| @@ -249,7 +224,7 @@ class FileDownloader(object): | ||||
|         else: | ||||
|             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' % | ||||
|                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True) | ||||
|         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % | ||||
|         self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' % | ||||
|                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) | ||||
|  | ||||
|     def report_resuming_byte(self, resume_len): | ||||
| @@ -281,6 +256,61 @@ class FileDownloader(object): | ||||
|                 (clear_line, data_len_str, self.format_seconds(tot_time))) | ||||
|  | ||||
|     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): | ||||
|         def run_rtmpdump(args): | ||||
|             start = time.time() | ||||
|             resume_percent = None | ||||
|             resume_downloaded_data_len = None | ||||
|             proc = subprocess.Popen(args, stderr=subprocess.PIPE) | ||||
|             cursor_in_new_line = True | ||||
|             proc_stderr_closed = False | ||||
|             while not proc_stderr_closed: | ||||
|                 # read line from stderr | ||||
|                 line = u'' | ||||
|                 while True: | ||||
|                     char = proc.stderr.read(1) | ||||
|                     if not char: | ||||
|                         proc_stderr_closed = True | ||||
|                         break | ||||
|                     if char in [b'\r', b'\n']: | ||||
|                         break | ||||
|                     line += char.decode('ascii', 'replace') | ||||
|                 if not line: | ||||
|                     # proc_stderr_closed is True | ||||
|                     continue | ||||
|                 mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) | ||||
|                 if mobj: | ||||
|                     downloaded_data_len = int(float(mobj.group(1))*1024) | ||||
|                     percent = float(mobj.group(2)) | ||||
|                     if not resume_percent: | ||||
|                         resume_percent = percent | ||||
|                         resume_downloaded_data_len = downloaded_data_len | ||||
|                     eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent) | ||||
|                     speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len) | ||||
|                     data_len = None | ||||
|                     if percent > 0: | ||||
|                         data_len = int(downloaded_data_len * 100 / percent) | ||||
|                     data_len_str = u'~' + format_bytes(data_len) | ||||
|                     self.report_progress(percent, data_len_str, speed, eta) | ||||
|                     cursor_in_new_line = False | ||||
|                     self._hook_progress({ | ||||
|                         'downloaded_bytes': downloaded_data_len, | ||||
|                         'total_bytes': data_len, | ||||
|                         'tmpfilename': tmpfilename, | ||||
|                         'filename': filename, | ||||
|                         'status': 'downloading', | ||||
|                         'eta': eta, | ||||
|                         'speed': speed, | ||||
|                     }) | ||||
|                 elif self.params.get('verbose', False): | ||||
|                     if not cursor_in_new_line: | ||||
|                         self.to_screen(u'') | ||||
|                     cursor_in_new_line = True | ||||
|                     self.to_screen(u'[rtmpdump] '+line) | ||||
|             proc.wait() | ||||
|             if not cursor_in_new_line: | ||||
|                 self.to_screen(u'') | ||||
|             return proc.returncode | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         test = self.params.get('test', False) | ||||
| @@ -291,12 +321,11 @@ class FileDownloader(object): | ||||
|         except (OSError, IOError): | ||||
|             self.report_error(u'RTMP download detected but "rtmpdump" could not be run') | ||||
|             return False | ||||
|         verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet' | ||||
|  | ||||
|         # Download using rtmpdump. rtmpdump returns exit code 2 when | ||||
|         # the connection was interrumpted and resuming appears to be | ||||
|         # possible. This is part of rtmpdump's normal usage, AFAIK. | ||||
|         basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename] | ||||
|         basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] | ||||
|         if player_url is not None: | ||||
|             basic_args += ['--swfVfy', player_url] | ||||
|         if page_url is not None: | ||||
| @@ -310,30 +339,48 @@ class FileDownloader(object): | ||||
|         if live: | ||||
|             basic_args += ['--live'] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] | ||||
|  | ||||
|         if sys.platform == 'win32' and sys.version_info < (3, 0): | ||||
|             # Windows subprocess module does not actually support Unicode | ||||
|             # on Python 2.x | ||||
|             # See http://stackoverflow.com/a/9951851/35070 | ||||
|             subprocess_encoding = sys.getfilesystemencoding() | ||||
|             args = [a.encode(subprocess_encoding, 'ignore') for a in args] | ||||
|         else: | ||||
|             subprocess_encoding = None | ||||
|  | ||||
|         if self.params.get('verbose', False): | ||||
|             if subprocess_encoding: | ||||
|                 str_args = [ | ||||
|                     a.decode(subprocess_encoding) if isinstance(a, bytes) else a | ||||
|                     for a in args] | ||||
|             else: | ||||
|                 str_args = args | ||||
|             try: | ||||
|                 import pipes | ||||
|                 shell_quote = lambda args: ' '.join(map(pipes.quote, args)) | ||||
|                 shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) | ||||
|             except ImportError: | ||||
|                 shell_quote = repr | ||||
|             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) | ||||
|         retval = subprocess.call(args) | ||||
|             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args)) | ||||
|  | ||||
|         retval = run_rtmpdump(args) | ||||
|  | ||||
|         while (retval == 2 or retval == 1) and not test: | ||||
|             prevsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) | ||||
|             self.to_screen(u'[rtmpdump] %s bytes' % prevsize) | ||||
|             time.sleep(5.0) # This seems to be needed | ||||
|             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) | ||||
|             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) | ||||
|             cursize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             if prevsize == cursize and retval == 1: | ||||
|                 break | ||||
|              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | ||||
|             if prevsize == cursize and retval == 2 and cursize > 1024: | ||||
|                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||
|                 self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||
|                 retval = 0 | ||||
|                 break | ||||
|         if retval == 0 or (test and retval == 2): | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'\r[rtmpdump] %s bytes' % fsize) | ||||
|             self.to_screen(u'[rtmpdump] %s bytes' % fsize) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
| @@ -536,7 +583,7 @@ class FileDownloader(object): | ||||
|                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | ||||
|                 return False | ||||
|  | ||||
|         data_len_str = self.format_bytes(data_len) | ||||
|         data_len_str = format_bytes(data_len) | ||||
|         byte_counter = 0 + resume_len | ||||
|         block_size = self.params.get('buffersize', 1024) | ||||
|         start = time.time() | ||||
|   | ||||
| @@ -5,17 +5,53 @@ from __future__ import absolute_import | ||||
|  | ||||
| import errno | ||||
| import io | ||||
| import json | ||||
| import os | ||||
| import platform | ||||
| import re | ||||
| import shutil | ||||
| import subprocess | ||||
| import socket | ||||
| import sys | ||||
| import time | ||||
| import traceback | ||||
|  | ||||
| from .utils import * | ||||
| if os.name == 'nt': | ||||
|     import ctypes | ||||
|  | ||||
| from .utils import ( | ||||
|     compat_cookiejar, | ||||
|     compat_http_client, | ||||
|     compat_print, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|     ContentTooShortError, | ||||
|     date_from_str, | ||||
|     DateRange, | ||||
|     determine_ext, | ||||
|     DownloadError, | ||||
|     encodeFilename, | ||||
|     ExtractorError, | ||||
|     format_bytes, | ||||
|     locked_file, | ||||
|     make_HTTPS_handler, | ||||
|     MaxDownloadsReached, | ||||
|     PostProcessingError, | ||||
|     platform_name, | ||||
|     preferredencoding, | ||||
|     SameFileError, | ||||
|     sanitize_filename, | ||||
|     subtitles_filename, | ||||
|     takewhile_inclusive, | ||||
|     UnavailableVideoError, | ||||
|     write_json_file, | ||||
|     write_string, | ||||
|     YoutubeDLHandler, | ||||
| ) | ||||
| from .extractor import get_info_extractor, gen_extractors | ||||
| from .FileDownloader import FileDownloader | ||||
| from .version import __version__ | ||||
|  | ||||
|  | ||||
| class YoutubeDL(object): | ||||
| @@ -57,6 +93,7 @@ class YoutubeDL(object): | ||||
|     forcethumbnail:    Force printing thumbnail URL. | ||||
|     forcedescription:  Force printing description. | ||||
|     forcefilename:     Force printing final filename. | ||||
|     forcejson:         Force printing info_dict as JSON. | ||||
|     simulate:          Do not download the video files. | ||||
|     format:            Video format code. | ||||
|     format_limit:      Highest quality format to try. | ||||
| @@ -68,6 +105,7 @@ class YoutubeDL(object): | ||||
|     playlistend:       Playlist item to end at. | ||||
|     matchtitle:        Download only matching titles. | ||||
|     rejecttitle:       Reject downloads for matching titles. | ||||
|     logger:            Log messages to a logging.Logger instance. | ||||
|     logtostderr:       Log messages to stderr instead of stdout. | ||||
|     writedescription:  Write the video description to a .description file | ||||
|     writeinfojson:     Write the video description to a .info.json file | ||||
| @@ -88,9 +126,12 @@ class YoutubeDL(object): | ||||
|     noplaylist:        Download single video instead of a playlist if in doubt. | ||||
|     age_limit:         An integer representing the user's age in years. | ||||
|                        Unsuitable videos for the given age are skipped. | ||||
|     downloadarchive:   File name of a file where all downloads are recorded. | ||||
|     download_archive:   File name of a file where all downloads are recorded. | ||||
|                        Videos already present in the file are not downloaded | ||||
|                        again. | ||||
|     cookiefile:        File name where cookies should be read from and dumped to. | ||||
|     nocheckcertificate:Do not verify SSL certificates | ||||
|     proxy:             URL of the proxy server to use | ||||
|  | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
|     the FileDownloader: | ||||
| @@ -105,7 +146,7 @@ class YoutubeDL(object): | ||||
|     _num_downloads = None | ||||
|     _screen_file = None | ||||
|  | ||||
|     def __init__(self, params): | ||||
|     def __init__(self, params={}): | ||||
|         """Create a FileDownloader object with the given options.""" | ||||
|         self._ies = [] | ||||
|         self._ies_instances = {} | ||||
| @@ -128,9 +169,11 @@ class YoutubeDL(object): | ||||
|         self.params = params | ||||
|         self.fd = FileDownloader(self, self.params) | ||||
|  | ||||
|         if '%(stitle)s' in self.params['outtmpl']: | ||||
|         if '%(stitle)s' in self.params.get('outtmpl', ''): | ||||
|             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') | ||||
|  | ||||
|         self._setup_opener() | ||||
|  | ||||
|     def add_info_extractor(self, ie): | ||||
|         """Add an InfoExtractor object to the end of the list.""" | ||||
|         self._ies.append(ie) | ||||
| @@ -163,7 +206,9 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def to_screen(self, message, skip_eol=False): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
|         if not self.params.get('quiet', False): | ||||
|         if self.params.get('logger'): | ||||
|             self.params['logger'].debug(message) | ||||
|         elif not self.params.get('quiet', False): | ||||
|             terminator = [u'\n', u''][skip_eol] | ||||
|             output = message + terminator | ||||
|             write_string(output, self._screen_file) | ||||
| @@ -171,14 +216,47 @@ class YoutubeDL(object): | ||||
|     def to_stderr(self, message): | ||||
|         """Print message to stderr.""" | ||||
|         assert type(message) == type(u'') | ||||
|         output = message + u'\n' | ||||
|         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|             output = output.encode(preferredencoding()) | ||||
|         sys.stderr.write(output) | ||||
|         if self.params.get('logger'): | ||||
|             self.params['logger'].error(message) | ||||
|         else: | ||||
|             output = message + u'\n' | ||||
|             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|                 output = output.encode(preferredencoding()) | ||||
|             sys.stderr.write(output) | ||||
|  | ||||
|     def fixed_template(self): | ||||
|         """Checks if the output template is fixed.""" | ||||
|         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) | ||||
|     def to_console_title(self, message): | ||||
|         if not self.params.get('consoletitle', False): | ||||
|             return | ||||
|         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): | ||||
|             # c_wchar_p() might not be necessary if `message` is | ||||
|             # already of type unicode() | ||||
|             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) | ||||
|         elif 'TERM' in os.environ: | ||||
|             write_string(u'\033]0;%s\007' % message, self._screen_file) | ||||
|  | ||||
|     def save_console_title(self): | ||||
|         if not self.params.get('consoletitle', False): | ||||
|             return | ||||
|         if 'TERM' in os.environ: | ||||
|             # Save the title on stack | ||||
|             write_string(u'\033[22;0t', self._screen_file) | ||||
|  | ||||
|     def restore_console_title(self): | ||||
|         if not self.params.get('consoletitle', False): | ||||
|             return | ||||
|         if 'TERM' in os.environ: | ||||
|             # Restore the title from stack | ||||
|             write_string(u'\033[23;0t', self._screen_file) | ||||
|  | ||||
|     def __enter__(self): | ||||
|         self.save_console_title() | ||||
|         return self | ||||
|  | ||||
|     def __exit__(self, *args): | ||||
|         self.restore_console_title() | ||||
|      | ||||
|         if self.params.get('cookiefile') is not None: | ||||
|             self.cookiejar.save() | ||||
|  | ||||
|     def trouble(self, message=None, tb=None): | ||||
|         """Determine action to take when a download problem appears. | ||||
| @@ -254,7 +332,7 @@ class YoutubeDL(object): | ||||
|         """Report file has already been fully downloaded.""" | ||||
|         try: | ||||
|             self.to_screen(u'[download] %s has already been downloaded' % file_name) | ||||
|         except (UnicodeEncodeError) as err: | ||||
|         except UnicodeEncodeError: | ||||
|             self.to_screen(u'[download] The file has already been downloaded') | ||||
|  | ||||
|     def increment_downloads(self): | ||||
| @@ -295,15 +373,17 @@ class YoutubeDL(object): | ||||
|     def _match_entry(self, info_dict): | ||||
|         """ Returns None iff the file should be downloaded """ | ||||
|  | ||||
|         title = info_dict['title'] | ||||
|         matchtitle = self.params.get('matchtitle', False) | ||||
|         if matchtitle: | ||||
|             if not re.search(matchtitle, title, re.IGNORECASE): | ||||
|                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | ||||
|         rejecttitle = self.params.get('rejecttitle', False) | ||||
|         if rejecttitle: | ||||
|             if re.search(rejecttitle, title, re.IGNORECASE): | ||||
|                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | ||||
|         if 'title' in info_dict: | ||||
|             # This can happen when we're just evaluating the playlist | ||||
|             title = info_dict['title'] | ||||
|             matchtitle = self.params.get('matchtitle', False) | ||||
|             if matchtitle: | ||||
|                 if not re.search(matchtitle, title, re.IGNORECASE): | ||||
|                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | ||||
|             rejecttitle = self.params.get('rejecttitle', False) | ||||
|             if rejecttitle: | ||||
|                 if re.search(rejecttitle, title, re.IGNORECASE): | ||||
|                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | ||||
|         date = info_dict.get('upload_date', None) | ||||
|         if date is not None: | ||||
|             dateRange = self.params.get('daterange', DateRange()) | ||||
| @@ -314,8 +394,8 @@ class YoutubeDL(object): | ||||
|             if age_limit < info_dict.get('age_limit', 0): | ||||
|                 return u'Skipping "' + title + '" because it is age restricted' | ||||
|         if self.in_download_archive(info_dict): | ||||
|             return (u'%(title)s has already been recorded in archive' | ||||
|                     % info_dict) | ||||
|             return (u'%s has already been recorded in archive' | ||||
|                     % info_dict.get('title', info_dict.get('id', u'video'))) | ||||
|         return None | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -385,7 +465,7 @@ class YoutubeDL(object): | ||||
|         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system | ||||
|         if result_type == 'video': | ||||
|             self.add_extra_info(ie_result, extra_info) | ||||
|             return self.process_video_result(ie_result) | ||||
|             return self.process_video_result(ie_result, download=download) | ||||
|         elif result_type == 'url': | ||||
|             # We have to add extra_info to the results because it may be | ||||
|             # contained in a playlist | ||||
| @@ -394,7 +474,7 @@ class YoutubeDL(object): | ||||
|                                      ie_key=ie_result.get('ie_key'), | ||||
|                                      extra_info=extra_info) | ||||
|         elif result_type == 'playlist': | ||||
|             self.add_extra_info(ie_result, extra_info) | ||||
|  | ||||
|             # We process each entry in the playlist | ||||
|             playlist = ie_result.get('title', None) or ie_result.get('id', None) | ||||
|             self.to_screen(u'[download] Downloading playlist: %s' % playlist) | ||||
| @@ -424,6 +504,12 @@ class YoutubeDL(object): | ||||
|                     'webpage_url': ie_result['webpage_url'], | ||||
|                     'extractor_key': ie_result['extractor_key'], | ||||
|                 } | ||||
|  | ||||
|                 reason = self._match_entry(entry) | ||||
|                 if reason is not None: | ||||
|                     self.to_screen(u'[download] ' + reason) | ||||
|                     continue | ||||
|  | ||||
|                 entry_result = self.process_ie_result(entry, | ||||
|                                                       download=download, | ||||
|                                                       extra_info=extra) | ||||
| @@ -579,7 +665,7 @@ class YoutubeDL(object): | ||||
|  | ||||
|         # Forced printings | ||||
|         if self.params.get('forcetitle', False): | ||||
|             compat_print(info_dict['title']) | ||||
|             compat_print(info_dict['fulltitle']) | ||||
|         if self.params.get('forceid', False): | ||||
|             compat_print(info_dict['id']) | ||||
|         if self.params.get('forceurl', False): | ||||
| @@ -593,6 +679,8 @@ class YoutubeDL(object): | ||||
|             compat_print(filename) | ||||
|         if self.params.get('forceformat', False): | ||||
|             compat_print(info_dict['format']) | ||||
|         if self.params.get('forcejson', False): | ||||
|             compat_print(json.dumps(info_dict)) | ||||
|  | ||||
|         # Do nothing else if in simulate mode | ||||
|         if self.params.get('simulate', False): | ||||
| @@ -640,7 +728,7 @@ class YoutubeDL(object): | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|             sub_format = self.params.get('subtitlesformat', 'srt') | ||||
|             for sub_lang in subtitles.keys(): | ||||
|                 sub = subtitles[sub_lang] | ||||
|                 if sub is None: | ||||
| @@ -655,7 +743,7 @@ class YoutubeDL(object): | ||||
|                     return | ||||
|  | ||||
|         if self.params.get('writeinfojson', False): | ||||
|             infofn = filename + u'.info.json' | ||||
|             infofn = os.path.splitext(filename)[0] + u'.info.json' | ||||
|             self.report_writeinfojson(infofn) | ||||
|             try: | ||||
|                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle']) | ||||
| @@ -706,13 +794,15 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def download(self, url_list): | ||||
|         """Download a given list of URLs.""" | ||||
|         if len(url_list) > 1 and self.fixed_template(): | ||||
|         if (len(url_list) > 1 and | ||||
|                 '%' not in self.params['outtmpl'] | ||||
|                 and self.params.get('max_downloads') != 1): | ||||
|             raise SameFileError(self.params['outtmpl']) | ||||
|  | ||||
|         for url in url_list: | ||||
|             try: | ||||
|                 #It also downloads the videos | ||||
|                 videos = self.extract_info(url) | ||||
|                 self.extract_info(url) | ||||
|             except UnavailableVideoError: | ||||
|                 self.report_error(u'unable to download video') | ||||
|             except MaxDownloadsReached: | ||||
| @@ -744,11 +834,26 @@ class YoutubeDL(object): | ||||
|             except (IOError, OSError): | ||||
|                 self.report_warning(u'Unable to remove downloaded video file') | ||||
|  | ||||
|     def _make_archive_id(self, info_dict): | ||||
|         # Future-proof against any change in case | ||||
|         # and backwards compatibility with prior versions | ||||
|         extractor = info_dict.get('extractor_key') | ||||
|         if extractor is None: | ||||
|             if 'id' in info_dict: | ||||
|                 extractor = info_dict.get('ie_key')  # key in a playlist | ||||
|         if extractor is None: | ||||
|             return None  # Incomplete video information | ||||
|         return extractor.lower() + u' ' + info_dict['id'] | ||||
|  | ||||
|     def in_download_archive(self, info_dict): | ||||
|         fn = self.params.get('download_archive') | ||||
|         if fn is None: | ||||
|             return False | ||||
|         vid_id = info_dict['extractor'] + u' ' + info_dict['id'] | ||||
|  | ||||
|         vid_id = self._make_archive_id(info_dict) | ||||
|         if vid_id is None: | ||||
|             return False  # Incomplete video information | ||||
|  | ||||
|         try: | ||||
|             with locked_file(fn, 'r', encoding='utf-8') as archive_file: | ||||
|                 for line in archive_file: | ||||
| @@ -763,12 +868,15 @@ class YoutubeDL(object): | ||||
|         fn = self.params.get('download_archive') | ||||
|         if fn is None: | ||||
|             return | ||||
|         vid_id = info_dict['extractor'] + u' ' + info_dict['id'] | ||||
|         vid_id = self._make_archive_id(info_dict) | ||||
|         assert vid_id | ||||
|         with locked_file(fn, 'a', encoding='utf-8') as archive_file: | ||||
|             archive_file.write(vid_id + u'\n') | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_resolution(format, default='unknown'): | ||||
|         if format.get('vcodec') == 'none': | ||||
|             return 'audio only' | ||||
|         if format.get('_resolution') is not None: | ||||
|             return format['_resolution'] | ||||
|         if format.get('height') is not None: | ||||
| @@ -781,23 +889,121 @@ class YoutubeDL(object): | ||||
|         return res | ||||
|  | ||||
|     def list_formats(self, info_dict): | ||||
|         def line(format): | ||||
|             return (u'%-20s%-10s%-12s%s' % ( | ||||
|         def format_note(fdict): | ||||
|             res = u'' | ||||
|             if fdict.get('format_note') is not None: | ||||
|                 res += fdict['format_note'] + u' ' | ||||
|             if (fdict.get('vcodec') is not None and | ||||
|                     fdict.get('vcodec') != 'none'): | ||||
|                 res += u'%-5s' % fdict['vcodec'] | ||||
|             elif fdict.get('vbr') is not None: | ||||
|                 res += u'video' | ||||
|             if fdict.get('vbr') is not None: | ||||
|                 res += u'@%4dk' % fdict['vbr'] | ||||
|             if fdict.get('acodec') is not None: | ||||
|                 if res: | ||||
|                     res += u', ' | ||||
|                 res += u'%-5s' % fdict['acodec'] | ||||
|             elif fdict.get('abr') is not None: | ||||
|                 if res: | ||||
|                     res += u', ' | ||||
|                 res += 'audio' | ||||
|             if fdict.get('abr') is not None: | ||||
|                 res += u'@%3dk' % fdict['abr'] | ||||
|             if fdict.get('filesize') is not None: | ||||
|                 if res: | ||||
|                     res += u', ' | ||||
|                 res += format_bytes(fdict['filesize']) | ||||
|             return res | ||||
|  | ||||
|         def line(format, idlen=20): | ||||
|             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % ( | ||||
|                 format['format_id'], | ||||
|                 format['ext'], | ||||
|                 self.format_resolution(format), | ||||
|                 format.get('format_note', ''), | ||||
|                 ) | ||||
|             ) | ||||
|                 format_note(format), | ||||
|             )) | ||||
|  | ||||
|         formats = info_dict.get('formats', [info_dict]) | ||||
|         formats_s = list(map(line, formats)) | ||||
|         idlen = max(len(u'format code'), | ||||
|                     max(len(f['format_id']) for f in formats)) | ||||
|         formats_s = [line(f, idlen) for f in formats] | ||||
|         if len(formats) > 1: | ||||
|             formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)' | ||||
|             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' | ||||
|  | ||||
|         header_line = line({ | ||||
|             'format_id': u'format code', 'ext': u'extension', | ||||
|             '_resolution': u'resolution', 'format_note': u'note'}) | ||||
|             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen) | ||||
|         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % | ||||
|                        (info_dict['id'], header_line, u"\n".join(formats_s))) | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
|         return self._opener.open(req) | ||||
|  | ||||
|     def print_debug_header(self): | ||||
|         if not self.params.get('verbose'): | ||||
|             return | ||||
|         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') | ||||
|         try: | ||||
|             sp = subprocess.Popen( | ||||
|                 ['git', 'rev-parse', '--short', 'HEAD'], | ||||
|                 stdout=subprocess.PIPE, stderr=subprocess.PIPE, | ||||
|                 cwd=os.path.dirname(os.path.abspath(__file__))) | ||||
|             out, err = sp.communicate() | ||||
|             out = out.decode().strip() | ||||
|             if re.match('[0-9a-f]+', out): | ||||
|                 write_string(u'[debug] Git HEAD: ' + out + u'\n') | ||||
|         except: | ||||
|             try: | ||||
|                 sys.exc_clear() | ||||
|             except: | ||||
|                 pass | ||||
|         write_string(u'[debug] Python version %s - %s' % | ||||
|                      (platform.python_version(), platform_name()) + u'\n') | ||||
|  | ||||
|         proxy_map = {} | ||||
|         for handler in self._opener.handlers: | ||||
|             if hasattr(handler, 'proxies'): | ||||
|                 proxy_map.update(handler.proxies) | ||||
|         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') | ||||
|  | ||||
|     def _setup_opener(self, timeout=20): | ||||
|         opts_cookiefile = self.params.get('cookiefile') | ||||
|         opts_proxy = self.params.get('proxy') | ||||
|  | ||||
|         if opts_cookiefile is None: | ||||
|             self.cookiejar = compat_cookiejar.CookieJar() | ||||
|         else: | ||||
|             self.cookiejar = compat_cookiejar.MozillaCookieJar( | ||||
|                 opts_cookiefile) | ||||
|             if os.access(opts_cookiefile, os.R_OK): | ||||
|                 self.cookiejar.load() | ||||
|  | ||||
|         cookie_processor = compat_urllib_request.HTTPCookieProcessor( | ||||
|             self.cookiejar) | ||||
|         if opts_proxy is not None: | ||||
|             if opts_proxy == '': | ||||
|                 proxies = {} | ||||
|             else: | ||||
|                 proxies = {'http': opts_proxy, 'https': opts_proxy} | ||||
|         else: | ||||
|             proxies = compat_urllib_request.getproxies() | ||||
|             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) | ||||
|             if 'http' in proxies and 'https' not in proxies: | ||||
|                 proxies['https'] = proxies['http'] | ||||
|         proxy_handler = compat_urllib_request.ProxyHandler(proxies) | ||||
|         https_handler = make_HTTPS_handler( | ||||
|             self.params.get('nocheckcertificate', False)) | ||||
|         opener = compat_urllib_request.build_opener( | ||||
|             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
|         # Delete the default user-agent header, which would otherwise apply in | ||||
|         # cases where our custom HTTP handler doesn't come into play | ||||
|         # (See https://github.com/rg3/youtube-dl/issues/1309 for details) | ||||
|         opener.addheaders = [] | ||||
|         self._opener = opener | ||||
|  | ||||
|         # TODO remove this global modification | ||||
|         compat_urllib_request.install_opener(opener) | ||||
|         socket.setdefaulttimeout(timeout) | ||||
|   | ||||
| @@ -34,50 +34,42 @@ __authors__  = ( | ||||
|     'Andras Elso', | ||||
|     'Jelle van der Waa', | ||||
|     'Marcin Cieślak', | ||||
|     'Anton Larionov', | ||||
|     'Takuya Tsuchida', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|  | ||||
| import codecs | ||||
| import collections | ||||
| import getpass | ||||
| import optparse | ||||
| import os | ||||
| import random | ||||
| import re | ||||
| import shlex | ||||
| import socket | ||||
| import subprocess | ||||
| import sys | ||||
| import traceback | ||||
| import platform | ||||
|  | ||||
|  | ||||
| from .utils import ( | ||||
|     compat_cookiejar, | ||||
|     compat_print, | ||||
|     compat_str, | ||||
|     compat_urllib_request, | ||||
|     DateRange, | ||||
|     decodeOption, | ||||
|     determine_ext, | ||||
|     DownloadError, | ||||
|     get_cachedir, | ||||
|     make_HTTPS_handler, | ||||
|     MaxDownloadsReached, | ||||
|     platform_name, | ||||
|     preferredencoding, | ||||
|     SameFileError, | ||||
|     std_headers, | ||||
|     write_string, | ||||
|     YoutubeDLHandler, | ||||
| ) | ||||
| from .update import update_self | ||||
| from .version import __version__ | ||||
| from .FileDownloader import ( | ||||
|     FileDownloader, | ||||
| ) | ||||
| from .extractor import gen_extractors | ||||
| from .version import __version__ | ||||
| from .YoutubeDL import YoutubeDL | ||||
| from .PostProcessor import ( | ||||
|     FFmpegMetadataPP, | ||||
| @@ -214,7 +206,9 @@ def parseOpts(overrideArguments=None): | ||||
|             dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) | ||||
|     selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') | ||||
|     selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') | ||||
|     selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) | ||||
|     selection.add_option('--max-downloads', metavar='NUMBER', | ||||
|                          dest='max_downloads', type=int, default=None, | ||||
|                          help='Abort after downloading NUMBER files') | ||||
|     selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) | ||||
|     selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) | ||||
|     selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) | ||||
| @@ -306,6 +300,9 @@ def parseOpts(overrideArguments=None): | ||||
|     verbosity.add_option('--get-format', | ||||
|             action='store_true', dest='getformat', | ||||
|             help='simulate, quiet but print output format', default=False) | ||||
|     verbosity.add_option('-j', '--dump-json', | ||||
|             action='store_true', dest='dumpjson', | ||||
|             help='simulate, quiet but print JSON information', default=False) | ||||
|     verbosity.add_option('--newline', | ||||
|             action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) | ||||
|     verbosity.add_option('--no-progress', | ||||
| @@ -447,19 +444,6 @@ def _real_main(argv=None): | ||||
|  | ||||
|     parser, opts, args = parseOpts(argv) | ||||
|  | ||||
|     # Open appropriate CookieJar | ||||
|     if opts.cookiefile is None: | ||||
|         jar = compat_cookiejar.CookieJar() | ||||
|     else: | ||||
|         try: | ||||
|             jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile) | ||||
|             if os.access(opts.cookiefile, os.R_OK): | ||||
|                 jar.load() | ||||
|         except (IOError, OSError) as err: | ||||
|             if opts.verbose: | ||||
|                 traceback.print_exc() | ||||
|             write_string(u'ERROR: unable to open cookie file\n') | ||||
|             sys.exit(101) | ||||
|     # Set user agent | ||||
|     if opts.user_agent is not None: | ||||
|         std_headers['User-Agent'] = opts.user_agent | ||||
| @@ -491,8 +475,6 @@ def _real_main(argv=None): | ||||
|     all_urls = batchurls + args | ||||
|     all_urls = [url.strip() for url in all_urls] | ||||
|  | ||||
|     opener = _setup_opener(jar=jar, opts=opts) | ||||
|  | ||||
|     extractors = gen_extractors() | ||||
|  | ||||
|     if opts.list_extractors: | ||||
| @@ -547,7 +529,7 @@ def _real_main(argv=None): | ||||
|     if opts.retries is not None: | ||||
|         try: | ||||
|             opts.retries = int(opts.retries) | ||||
|         except (TypeError, ValueError) as err: | ||||
|         except (TypeError, ValueError): | ||||
|             parser.error(u'invalid retry count specified') | ||||
|     if opts.buffersize is not None: | ||||
|         numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) | ||||
| @@ -558,13 +540,13 @@ def _real_main(argv=None): | ||||
|         opts.playliststart = int(opts.playliststart) | ||||
|         if opts.playliststart <= 0: | ||||
|             raise ValueError(u'Playlist start must be positive') | ||||
|     except (TypeError, ValueError) as err: | ||||
|     except (TypeError, ValueError): | ||||
|         parser.error(u'invalid playlist start number specified') | ||||
|     try: | ||||
|         opts.playlistend = int(opts.playlistend) | ||||
|         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): | ||||
|             raise ValueError(u'Playlist end must be greater than playlist start') | ||||
|     except (TypeError, ValueError) as err: | ||||
|     except (TypeError, ValueError): | ||||
|         parser.error(u'invalid playlist end number specified') | ||||
|     if opts.extractaudio: | ||||
|         if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: | ||||
| @@ -603,13 +585,12 @@ def _real_main(argv=None): | ||||
|                      u' file! Use "%%(ext)s" instead of %r' % | ||||
|                      determine_ext(outtmpl, u'')) | ||||
|  | ||||
|     # YoutubeDL | ||||
|     ydl = YoutubeDL({ | ||||
|     ydl_opts = { | ||||
|         'usenetrc': opts.usenetrc, | ||||
|         'username': opts.username, | ||||
|         'password': opts.password, | ||||
|         'videopassword': opts.videopassword, | ||||
|         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), | ||||
|         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson), | ||||
|         'forceurl': opts.geturl, | ||||
|         'forcetitle': opts.gettitle, | ||||
|         'forceid': opts.getid, | ||||
| @@ -617,8 +598,9 @@ def _real_main(argv=None): | ||||
|         'forcedescription': opts.getdescription, | ||||
|         'forcefilename': opts.getfilename, | ||||
|         'forceformat': opts.getformat, | ||||
|         'forcejson': opts.dumpjson, | ||||
|         'simulate': opts.simulate, | ||||
|         'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), | ||||
|         'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson), | ||||
|         'format': opts.format, | ||||
|         'format_limit': opts.format_limit, | ||||
|         'listformats': opts.listformats, | ||||
| @@ -667,102 +649,45 @@ def _real_main(argv=None): | ||||
|         'youtube_print_sig_code': opts.youtube_print_sig_code, | ||||
|         'age_limit': opts.age_limit, | ||||
|         'download_archive': opts.download_archive, | ||||
|         }) | ||||
|         'cookiefile': opts.cookiefile, | ||||
|         'nocheckcertificate': opts.no_check_certificate, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
|         ydl.print_debug_header() | ||||
|         ydl.add_default_info_extractors() | ||||
|  | ||||
|         # PostProcessors | ||||
|         # Add the metadata pp first, the other pps will copy it | ||||
|         if opts.addmetadata: | ||||
|             ydl.add_post_processor(FFmpegMetadataPP()) | ||||
|         if opts.extractaudio: | ||||
|             ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||
|         if opts.recodevideo: | ||||
|             ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||
|         if opts.embedsubtitles: | ||||
|             ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) | ||||
|  | ||||
|         # Update version | ||||
|         if opts.update_self: | ||||
|             update_self(ydl.to_screen, opts.verbose) | ||||
|  | ||||
|         # Maybe do nothing | ||||
|         if len(all_urls) < 1: | ||||
|             if not opts.update_self: | ||||
|                 parser.error(u'you must provide at least one URL') | ||||
|             else: | ||||
|                 sys.exit() | ||||
|  | ||||
|     if opts.verbose: | ||||
|         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') | ||||
|         try: | ||||
|             sp = subprocess.Popen( | ||||
|                 ['git', 'rev-parse', '--short', 'HEAD'], | ||||
|                 stdout=subprocess.PIPE, stderr=subprocess.PIPE, | ||||
|                 cwd=os.path.dirname(os.path.abspath(__file__))) | ||||
|             out, err = sp.communicate() | ||||
|             out = out.decode().strip() | ||||
|             if re.match('[0-9a-f]+', out): | ||||
|                 write_string(u'[debug] Git HEAD: ' + out + u'\n') | ||||
|         except: | ||||
|             try: | ||||
|                 sys.exc_clear() | ||||
|             except: | ||||
|                 pass | ||||
|         write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') | ||||
|  | ||||
|         proxy_map = {} | ||||
|         for handler in opener.handlers: | ||||
|             if hasattr(handler, 'proxies'): | ||||
|                 proxy_map.update(handler.proxies) | ||||
|         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') | ||||
|  | ||||
|     ydl.add_default_info_extractors() | ||||
|  | ||||
|     # PostProcessors | ||||
|     # Add the metadata pp first, the other pps will copy it | ||||
|     if opts.addmetadata: | ||||
|         ydl.add_post_processor(FFmpegMetadataPP()) | ||||
|     if opts.extractaudio: | ||||
|         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||
|     if opts.recodevideo: | ||||
|         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||
|     if opts.embedsubtitles: | ||||
|         ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) | ||||
|  | ||||
|     # Update version | ||||
|     if opts.update_self: | ||||
|         update_self(ydl.to_screen, opts.verbose) | ||||
|  | ||||
|     # Maybe do nothing | ||||
|     if len(all_urls) < 1: | ||||
|         if not opts.update_self: | ||||
|             parser.error(u'you must provide at least one URL') | ||||
|         else: | ||||
|             sys.exit() | ||||
|  | ||||
|     try: | ||||
|         retcode = ydl.download(all_urls) | ||||
|     except MaxDownloadsReached: | ||||
|         ydl.to_screen(u'--max-download limit reached, aborting.') | ||||
|         retcode = 101 | ||||
|  | ||||
|     # Dump cookie jar if requested | ||||
|     if opts.cookiefile is not None: | ||||
|         try: | ||||
|             jar.save() | ||||
|         except (IOError, OSError): | ||||
|             sys.exit(u'ERROR: unable to save cookie jar') | ||||
|             retcode = ydl.download(all_urls) | ||||
|         except MaxDownloadsReached: | ||||
|             ydl.to_screen(u'--max-download limit reached, aborting.') | ||||
|             retcode = 101 | ||||
|  | ||||
|     sys.exit(retcode) | ||||
|  | ||||
|  | ||||
| def _setup_opener(jar=None, opts=None, timeout=300): | ||||
|     if opts is None: | ||||
|         FakeOptions = collections.namedtuple( | ||||
|             'FakeOptions', ['proxy', 'no_check_certificate']) | ||||
|         opts = FakeOptions(proxy=None, no_check_certificate=False) | ||||
|  | ||||
|     cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) | ||||
|     if opts.proxy is not None: | ||||
|         if opts.proxy == '': | ||||
|             proxies = {} | ||||
|         else: | ||||
|             proxies = {'http': opts.proxy, 'https': opts.proxy} | ||||
|     else: | ||||
|         proxies = compat_urllib_request.getproxies() | ||||
|         # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) | ||||
|         if 'http' in proxies and 'https' not in proxies: | ||||
|             proxies['https'] = proxies['http'] | ||||
|     proxy_handler = compat_urllib_request.ProxyHandler(proxies) | ||||
|     https_handler = make_HTTPS_handler(opts) | ||||
|     opener = compat_urllib_request.build_opener( | ||||
|         https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
|     # Delete the default user-agent header, which would otherwise apply in | ||||
|     # cases where our custom HTTP handler doesn't come into play | ||||
|     # (See https://github.com/rg3/youtube-dl/issues/1309 for details) | ||||
|     opener.addheaders = [] | ||||
|     compat_urllib_request.install_opener(opener) | ||||
|     socket.setdefaulttimeout(timeout) | ||||
|     return opener | ||||
|  | ||||
|  | ||||
| def main(argv=None): | ||||
|     try: | ||||
|         _real_main(argv) | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from .appletrailers import AppleTrailersIE | ||||
| from .addanime import AddAnimeIE | ||||
| from .anitube import AnitubeIE | ||||
| from .archiveorg import ArchiveOrgIE | ||||
| from .ard import ARDIE | ||||
| from .arte import ( | ||||
| @@ -10,7 +11,7 @@ from .arte import ( | ||||
| ) | ||||
| from .auengine import AUEngineIE | ||||
| from .bambuser import BambuserIE, BambuserChannelIE | ||||
| from .bandcamp import BandcampIE | ||||
| from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| from .bliptv import BlipTVIE, BlipTVUserIE | ||||
| from .bloomberg import BloombergIE | ||||
| from .breakcom import BreakIE | ||||
| @@ -19,12 +20,14 @@ from .c56 import C56IE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cinemassacre import CinemassacreIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cnn import CNNIE | ||||
| from .collegehumor import CollegeHumorIE | ||||
| from .comedycentral import ComedyCentralIE | ||||
| from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE | ||||
| from .condenast import CondeNastIE | ||||
| from .criterion import CriterionIE | ||||
| from .cspan import CSpanIE | ||||
| from .d8 import D8IE | ||||
| from .dailymotion import ( | ||||
|     DailymotionIE, | ||||
|     DailymotionPlaylistIE, | ||||
| @@ -80,7 +83,7 @@ from .keezmovies import KeezMoviesIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .keek import KeekIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import LivestreamIE | ||||
| from .livestream import LivestreamIE, LivestreamOriginalIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mit import TechTVMITIE, MITIE | ||||
| @@ -96,6 +99,7 @@ from .nba import NBAIE | ||||
| from .nbc import NBCNewsIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| @@ -116,19 +120,24 @@ from .slashdot import SlashdotIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .sohu import SohuIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE | ||||
| from .southparkstudios import SouthParkStudiosIE | ||||
| from .southparkstudios import ( | ||||
|     SouthParkStudiosIE, | ||||
|     SouthparkDeIE, | ||||
| ) | ||||
| from .space import SpaceIE | ||||
| from .spankwire import SpankwireIE | ||||
| from .spiegel import SpiegelIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .statigram import StatigramIE | ||||
| from .steam import SteamIE | ||||
| from .streamcloud import StreamcloudIE | ||||
| from .sztvhu import SztvHuIE | ||||
| from .teamcoco import TeamcocoIE | ||||
| from .techtalks import TechTalksIE | ||||
| from .ted import TEDIE | ||||
| from .tf1 import TF1IE | ||||
| from .thisav import ThisAVIE | ||||
| from .toutv import TouTvIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| from .trilulilu import TriluliluIE | ||||
| from .tube8 import Tube8IE | ||||
| @@ -149,6 +158,7 @@ from .videofyme import VideofyMeIE | ||||
| from .videopremium import VideoPremiumIE | ||||
| from .vimeo import VimeoIE, VimeoChannelIE | ||||
| from .vine import VineIE | ||||
| from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .wat import WatIE | ||||
| from .websurg import WeBSurgIE | ||||
| @@ -176,6 +186,7 @@ from .youtube import ( | ||||
|     YoutubeTruncatedURLIE, | ||||
|     YoutubeWatchLaterIE, | ||||
|     YoutubeFavouritesIE, | ||||
|     YoutubeHistoryIE, | ||||
| ) | ||||
| from .zdf import ZDFIE | ||||
|  | ||||
|   | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/anitube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/anitube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AnitubeIE(InfoExtractor): | ||||
|     IE_NAME = u'anitube.se' | ||||
|     _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.anitube.se/video/36621', | ||||
|         u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', | ||||
|         u'file': u'36621.mp4', | ||||
|         u'info_dict': { | ||||
|             u'id': u'36621', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'Recorder to Randoseru 01', | ||||
|         }, | ||||
|         u'skip': u'Blocked in the US', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', | ||||
|                                       webpage, u'key') | ||||
|  | ||||
|         webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, | ||||
|                                                 key) | ||||
|         config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8')) | ||||
|  | ||||
|         video_title = config_xml.find('title').text | ||||
|  | ||||
|         formats = [] | ||||
|         video_url = config_xml.find('file') | ||||
|         if video_url is not None: | ||||
|             formats.append({ | ||||
|                 'format_id': 'sd', | ||||
|                 'url': video_url.text, | ||||
|             }) | ||||
|         video_url = config_xml.find('filehd') | ||||
|         if video_url is not None: | ||||
|             formats.append({ | ||||
|                 'format_id': 'hd', | ||||
|                 'url': video_url.text, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': formats | ||||
|         } | ||||
| @@ -69,7 +69,7 @@ class ArteTvIE(InfoExtractor): | ||||
|             lang = mobj.group('lang') | ||||
|             return self._extract_liveweb(url, name, lang) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, video_id) is not None: | ||||
|         if re.search(self._LIVE_URL, url) is not None: | ||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
|             # return | ||||
| @@ -115,7 +115,7 @@ class ArteTvIE(InfoExtractor): | ||||
|         event_doc = config_doc.find('event') | ||||
|         url_node = event_doc.find('video').find('urlHd') | ||||
|         if url_node is None: | ||||
|             url_node = video_doc.find('urlSd') | ||||
|             url_node = event_doc.find('urlSd') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': event_doc.find('name%s' % lang.capitalize()).text, | ||||
|   | ||||
| @@ -1,10 +1,10 @@ | ||||
| import os.path | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class AUEngineIE(InfoExtractor): | ||||
| @@ -25,22 +25,25 @@ class AUEngineIE(InfoExtractor): | ||||
|         title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', | ||||
|                 webpage, u'title') | ||||
|         title = title.strip() | ||||
|         links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage) | ||||
|         links = [compat_urllib_parse.unquote(l) for l in links] | ||||
|         links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) | ||||
|         links = map(compat_urllib_parse.unquote, links) | ||||
|  | ||||
|         thumbnail = None | ||||
|         video_url = None | ||||
|         for link in links: | ||||
|             root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path) | ||||
|             if pathext == '.png': | ||||
|             if link.endswith('.png'): | ||||
|                 thumbnail = link | ||||
|             elif pathext == '.mp4': | ||||
|                 url = link | ||||
|                 ext = pathext | ||||
|             elif '/videos/' in link: | ||||
|                 video_url = link | ||||
|         if not video_url: | ||||
|             raise ExtractorError(u'Could not find video URL') | ||||
|         ext = u'.' + determine_ext(video_url) | ||||
|         if ext == title[-len(ext):]: | ||||
|             title = title[:-len(ext)] | ||||
|         ext = ext[1:] | ||||
|         return [{ | ||||
|  | ||||
|         return { | ||||
|             'id':        video_id, | ||||
|             'url':       url, | ||||
|             'ext':       ext, | ||||
|             'url':       video_url, | ||||
|             'title':     title, | ||||
|             'thumbnail': thumbnail, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -15,7 +15,8 @@ class BambuserIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://bambuser.com/v/4050584', | ||||
|         u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', | ||||
|         # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 | ||||
|         #u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', | ||||
|         u'info_dict': { | ||||
|             u'id': u'4050584', | ||||
|             u'ext': u'flv', | ||||
| @@ -24,6 +25,11 @@ class BambuserIE(InfoExtractor): | ||||
|             u'uploader': u'pixelversity', | ||||
|             u'uploader_id': u'344706', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # It doesn't respect the 'Range' header, it would download the whole video | ||||
|             # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -3,13 +3,16 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BandcampIE(InfoExtractor): | ||||
|     IE_NAME = u'Bandcamp' | ||||
|     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', | ||||
|         u'file': u'1812978515.mp3', | ||||
|         u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', | ||||
| @@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor): | ||||
|             u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" | ||||
|         }, | ||||
|         u'skip': u'There is a limit of 200 free downloads / month for the test song' | ||||
|     } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -26,6 +29,23 @@ class BandcampIE(InfoExtractor): | ||||
|         # We get the link to the free download page | ||||
|         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) | ||||
|         if m_download is None: | ||||
|             m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) | ||||
|         if m_trackinfo: | ||||
|             json_code = m_trackinfo.group(1) | ||||
|             data = json.loads(json_code) | ||||
|  | ||||
|             for d in data: | ||||
|                 formats = [{ | ||||
|                     'format_id': 'format_id', | ||||
|                     'url': format_url, | ||||
|                     'ext': format_id.partition('-')[0] | ||||
|                 } for format_id, format_url in sorted(d['file'].items())] | ||||
|                 return { | ||||
|                     'id': compat_str(d['id']), | ||||
|                     'title': d['title'], | ||||
|                     'formats': formats, | ||||
|                 } | ||||
|         else: | ||||
|             raise ExtractorError(u'No free songs found') | ||||
|  | ||||
|         download_link = m_download.group(1) | ||||
| @@ -61,3 +81,49 @@ class BandcampIE(InfoExtractor): | ||||
|                       } | ||||
|  | ||||
|         return [track_info] | ||||
|  | ||||
|  | ||||
| class BandcampAlbumIE(InfoExtractor): | ||||
|     IE_NAME = u'Bandcamp:album' | ||||
|     _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | ||||
|         u'playlist': [ | ||||
|             { | ||||
|                 u'file': u'1353101989.mp3', | ||||
|                 u'md5': u'39bc1eded3476e927c724321ddf116cf', | ||||
|                 u'info_dict': { | ||||
|                     u'title': u'Intro', | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u'file': u'38097443.mp3', | ||||
|                 u'md5': u'1a2c32e2691474643e912cc6cd4bffaa', | ||||
|                 u'info_dict': { | ||||
|                     u'title': u'Kero One - Keep It Alive (Blazo remix)', | ||||
|                 } | ||||
|             }, | ||||
|         ], | ||||
|         u'params': { | ||||
|             u'playlistend': 2 | ||||
|         }, | ||||
|         u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) | ||||
|         if not tracks_paths: | ||||
|             raise ExtractorError(u'The page doesn\'t contain any track') | ||||
|         entries = [ | ||||
|             self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) | ||||
|             for t_path in tracks_paths] | ||||
|         title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title') | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor): | ||||
|         params = {'flashID': object_doc.attrib['id'], | ||||
|                   'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'], | ||||
|                   } | ||||
|         playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey') | ||||
|         def find_param(name): | ||||
|             node = find_xpath_attr(object_doc, './param', 'name', name) | ||||
|             if node is not None: | ||||
|                 return node.attrib['value'] | ||||
|             return None | ||||
|         playerKey = find_param('playerKey') | ||||
|         # Not all pages define this value | ||||
|         if playerKey is not None: | ||||
|             params['playerKey'] = playerKey.attrib['value'] | ||||
|         videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer') | ||||
|             params['playerKey'] = playerKey | ||||
|         # The three fields hold the id of the video | ||||
|         videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') | ||||
|         if videoPlayer is not None: | ||||
|             params['@videoPlayer'] = videoPlayer.attrib['value'] | ||||
|         linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL') | ||||
|             params['@videoPlayer'] = videoPlayer | ||||
|         linkBase = find_param('linkBaseURL') | ||||
|         if linkBase is not None: | ||||
|             params['linkBaseURL'] = linkBase.attrib['value'] | ||||
|             params['linkBaseURL'] = linkBase | ||||
|         data = compat_urllib_parse.urlencode(params) | ||||
|         return cls._FEDERATED_URL_TEMPLATE % data | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import xml.etree.ElementTree | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' | ||||
| @@ -25,7 +26,7 @@ class CanalplusIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = mobj.groupdict().get('id') | ||||
|         if video_id is None: | ||||
|             webpage = self._download_webpage(url, mobj.group('path')) | ||||
|             video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') | ||||
|   | ||||
| @@ -12,27 +12,21 @@ class CinemassacreIE(InfoExtractor): | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||
|         u'file': u'19911.flv', | ||||
|         u'md5': u'f9bb7ede54d1229c9846e197b4737e06', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20121110', | ||||
|             u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', | ||||
|             u'description': u'md5:fb87405fcb42a331742a0dce2708560b', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # rtmp download | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         } | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||
|         u'file': u'521be8ef82b16.flv', | ||||
|         u'md5': u'9509ee44dcaa7c1068604817c19a9e50', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20131002', | ||||
|             u'title': u'The Mummy’s Hand (1940)', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # rtmp download | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/clipfish.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/clipfish.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| import re | ||||
| import time | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class ClipfishIE(InfoExtractor): | ||||
|     IE_NAME = u'clipfish' | ||||
|  | ||||
|     _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/', | ||||
|         u'file': u'4028320.f4v', | ||||
|         u'md5': u'5e38bda8c329fbfb42be0386a3f5a382', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect', | ||||
|             u'duration': 399, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % | ||||
|                     (video_id, int(time.time()))) | ||||
|         info_xml = self._download_webpage( | ||||
|             info_url, video_id, note=u'Downloading info page') | ||||
|         doc = xml.etree.ElementTree.fromstring(info_xml) | ||||
|         title = doc.find('title').text | ||||
|         video_url = doc.find('filename').text | ||||
|         thumbnail = doc.find('imageurl').text | ||||
|         duration_str = doc.find('duration').text | ||||
|         m = re.match( | ||||
|             r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$', | ||||
|             duration_str) | ||||
|         if m: | ||||
|             duration = ( | ||||
|                 (int(m.group('hours')) * 60 * 60) + | ||||
|                 (int(m.group('minutes')) * 60) + | ||||
|                 (int(m.group('seconds'))) | ||||
|             ) | ||||
|         else: | ||||
|             duration = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|         } | ||||
| @@ -1,5 +1,4 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor): | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id | ||||
|         metaXml = self._download_webpage(xmlUrl, video_id, | ||||
|         mdoc = self._download_xml(xmlUrl, video_id, | ||||
|                                          u'Downloading info XML', | ||||
|                                          u'Unable to download video info XML') | ||||
|  | ||||
|         mdoc = xml.etree.ElementTree.fromstring(metaXml) | ||||
|         try: | ||||
|             videoNode = mdoc.findall('./video')[0] | ||||
|             youtubeIdNode = videoNode.find('./youtubeID') | ||||
| @@ -65,16 +63,13 @@ class CollegeHumorIE(InfoExtractor): | ||||
|  | ||||
|         if next_url.endswith(u'manifest.f4m'): | ||||
|             manifest_url = next_url + '?hdcore=2.10.3' | ||||
|             manifestXml = self._download_webpage(manifest_url, video_id, | ||||
|             adoc = self._download_xml(manifest_url, video_id, | ||||
|                                          u'Downloading XML manifest', | ||||
|                                          u'Unable to download video info XML') | ||||
|  | ||||
|             adoc = xml.etree.ElementTree.fromstring(manifestXml) | ||||
|             try: | ||||
|                 media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | ||||
|                 node_id = media_node.attrib['url'] | ||||
|                 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||
|             except IndexError as err: | ||||
|             except IndexError: | ||||
|                 raise ExtractorError(u'Invalid manifest file') | ||||
|             url_pr = compat_urllib_parse_urlparse(info['thumbnail']) | ||||
|             info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') | ||||
|   | ||||
| @@ -2,6 +2,7 @@ import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .mtv import MTVIE, _media_xml_tag | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
| @@ -11,7 +12,37 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ComedyCentralIE(InfoExtractor): | ||||
| class ComedyCentralIE(MTVIE): | ||||
|     _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' | ||||
|     _FEED_URL = u'http://comedycentral.com/feeds/mrss/' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', | ||||
|         u'md5': u'4167875aae411f903b751a21f357f1ee', | ||||
|         u'info_dict': { | ||||
|             u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother', | ||||
|             u'description': u'After a certain point, breastfeeding becomes c**kblocking.', | ||||
|         }, | ||||
|     } | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         return itemdoc.find(search_path).attrib['url'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"', | ||||
|                                   webpage, u'mgid') | ||||
|         return self._get_videos_info(mgid) | ||||
|  | ||||
|  | ||||
| class ComedyCentralShowsIE(InfoExtractor): | ||||
|     IE_DESC = u'The Daily Show / Colbert Report' | ||||
|     # urls can be abbreviations like :thedailyshow or :colbert | ||||
|     # urls for episodes like: | ||||
|   | ||||
| @@ -4,11 +4,11 @@ import re | ||||
| import socket | ||||
| import sys | ||||
| import netrc | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|  | ||||
|     clean_html, | ||||
| @@ -19,6 +19,7 @@ from ..utils import ( | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class InfoExtractor(object): | ||||
|     """Information Extractor class. | ||||
|  | ||||
| @@ -71,6 +72,11 @@ class InfoExtractor(object): | ||||
|                                 ("3D" or "DASH video") | ||||
|                     * width     Width of the video, if known | ||||
|                     * height    Height of the video, if known | ||||
|                     * abr       Average audio bitrate in KBit/s | ||||
|                     * acodec    Name of the audio codec in use | ||||
|                     * vbr       Average video bitrate in KBit/s | ||||
|                     * vcodec    Name of the video codec in use | ||||
|                     * filesize  The number of bytes, if known in advance | ||||
|     webpage_url:    The url to the video webpage, if given to youtube-dl it | ||||
|                     should allow to get the same result again. (It will be set | ||||
|                     by YoutubeDL if it's missing) | ||||
| @@ -152,7 +158,7 @@ class InfoExtractor(object): | ||||
|         elif note is not False: | ||||
|             self.to_screen(u'%s: %s' % (video_id, note)) | ||||
|         try: | ||||
|             return compat_urllib_request.urlopen(url_or_request) | ||||
|             return self._downloader.urlopen(url_or_request) | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             if errnote is None: | ||||
|                 errnote = u'Unable to download webpage' | ||||
| @@ -204,6 +210,11 @@ class InfoExtractor(object): | ||||
|         """ Returns the data of the page as a string """ | ||||
|         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] | ||||
|  | ||||
|     def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'): | ||||
|         """Return the xml as an xml.etree.ElementTree.Element""" | ||||
|         xml_string = self._download_webpage(url_or_request, video_id, note, errnote) | ||||
|         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) | ||||
|  | ||||
|     def to_screen(self, msg): | ||||
|         """Print msg to screen, prefixing it with '[ie_name]'""" | ||||
|         self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) | ||||
| @@ -225,12 +236,14 @@ class InfoExtractor(object): | ||||
|         self.to_screen(u'Logging in') | ||||
|  | ||||
|     #Methods for following #608 | ||||
|     def url_result(self, url, ie=None): | ||||
|     def url_result(self, url, ie=None, video_id=None): | ||||
|         """Returns a url that points to a page that should be processed""" | ||||
|         #TODO: ie should be the class used for getting the info | ||||
|         video_info = {'_type': 'url', | ||||
|                       'url': url, | ||||
|                       'ie_key': ie} | ||||
|         if video_id is not None: | ||||
|             video_info['id'] = video_id | ||||
|         return video_info | ||||
|     def playlist_result(self, entries, playlist_id=None, playlist_title=None): | ||||
|         """Returns a playlist""" | ||||
| @@ -315,13 +328,19 @@ class InfoExtractor(object): | ||||
|  | ||||
|     # Helper functions for extracting OpenGraph info | ||||
|     @staticmethod | ||||
|     def _og_regex(prop): | ||||
|         return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop) | ||||
|     def _og_regexes(prop): | ||||
|         content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' | ||||
|         property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) | ||||
|         template = r'<meta[^>]+?%s[^>]+?%s' | ||||
|         return [ | ||||
|             template % (property_re, content_re), | ||||
|             template % (content_re, property_re), | ||||
|         ] | ||||
|  | ||||
|     def _og_search_property(self, prop, html, name=None, **kargs): | ||||
|         if name is None: | ||||
|             name = 'OpenGraph %s' % prop | ||||
|         escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs) | ||||
|         escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs) | ||||
|         if escaped is None: | ||||
|             return None | ||||
|         return unescapeHTML(escaped) | ||||
| @@ -336,10 +355,21 @@ class InfoExtractor(object): | ||||
|         return self._og_search_property('title', html, **kargs) | ||||
|  | ||||
|     def _og_search_video_url(self, html, name='video url', secure=True, **kargs): | ||||
|         regexes = [self._og_regex('video')] | ||||
|         if secure: regexes.insert(0, self._og_regex('video:secure_url')) | ||||
|         regexes = self._og_regexes('video') | ||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||
|         return self._html_search_regex(regexes, html, name, **kargs) | ||||
|  | ||||
|     def _html_search_meta(self, name, html, display_name=None): | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\']) | ||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||
|             html, display_name, fatal=False) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
|         return self._html_search_meta('dc.creator', html, 'uploader') | ||||
|  | ||||
|     def _rta_search(self, html): | ||||
|         # See http://www.rtalabel.org/index.php?content=howtofaq#single | ||||
|         if re.search(r'(?ix)<meta\s+name="rating"\s+' | ||||
| @@ -348,6 +378,23 @@ class InfoExtractor(object): | ||||
|             return 18 | ||||
|         return 0 | ||||
|  | ||||
|     def _media_rating_search(self, html): | ||||
|         # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ | ||||
|         rating = self._html_search_meta('rating', html) | ||||
|  | ||||
|         if not rating: | ||||
|             return None | ||||
|  | ||||
|         RATING_TABLE = { | ||||
|             'safe for kids': 0, | ||||
|             'general': 8, | ||||
|             '14 years': 14, | ||||
|             'mature': 17, | ||||
|             'restricted': 19, | ||||
|         } | ||||
|         return RATING_TABLE.get(rating.lower(), None) | ||||
|  | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
|   | ||||
							
								
								
									
										22
									
								
								youtube_dl/extractor/d8.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								youtube_dl/extractor/d8.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| # encoding: utf-8 | ||||
| from .canalplus import CanalplusIE | ||||
|  | ||||
|  | ||||
| class D8IE(CanalplusIE): | ||||
|     _VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s' | ||||
|     IE_NAME = u'd8.tv' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html', | ||||
|         u'file': u'966289.flv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Campagne intime - Documentaire exceptionnel', | ||||
|             u'description': u'md5:d2643b799fb190846ae09c61e59a859f', | ||||
|             u'upload_date': u'20131108', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # rtmp | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
| @@ -186,7 +186,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|             webpage = self._download_webpage(request, | ||||
|                                              id, u'Downloading page %s' % pagenum) | ||||
|  | ||||
|             playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) | ||||
|             playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage) | ||||
|             video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| import itertools | ||||
| import json | ||||
| import random | ||||
| import re | ||||
|   | ||||
| @@ -11,11 +11,11 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class EscapistIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' | ||||
|     _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', | ||||
|         u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4', | ||||
|         u'md5': u'c6793dbda81388f4264c1ba18684a74d', | ||||
|         u'md5': u'ab3a706c681efca53f0a35f1415cf0d1', | ||||
|         u'info_dict': { | ||||
|             u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",  | ||||
|             u"uploader": u"the-escapist-presents",  | ||||
| @@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         showName = mobj.group('showname') | ||||
|         videoId = mobj.group('episode') | ||||
|  | ||||
|         self.report_extraction(videoId) | ||||
|         webpage = self._download_webpage(url, videoId) | ||||
|  | ||||
|         videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"', | ||||
|         videoDesc = self._html_search_regex( | ||||
|             r'<meta name="description" content="([^"]*)"', | ||||
|             webpage, u'description', fatal=False) | ||||
|  | ||||
|         playerUrl = self._og_search_video_url(webpage, name='player url') | ||||
|         playerUrl = self._og_search_video_url(webpage, name=u'player URL') | ||||
|  | ||||
|         title = self._html_search_regex('<meta name="title" content="([^"]*)"', | ||||
|             webpage, u'player url').split(' : ')[-1] | ||||
|         title = self._html_search_regex( | ||||
|             r'<meta name="title" content="([^"]*)"', | ||||
|             webpage, u'title').split(' : ')[-1] | ||||
|  | ||||
|         configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url') | ||||
|         configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL') | ||||
|         configUrl = compat_urllib_parse.unquote(configUrl) | ||||
|  | ||||
|         configJSON = self._download_webpage(configUrl, videoId, | ||||
|                                             u'Downloading configuration', | ||||
|                                             u'unable to download configuration') | ||||
|         formats = [] | ||||
|  | ||||
|         # Technically, it's JavaScript, not JSON | ||||
|         configJSON = configJSON.replace("'", '"') | ||||
|         def _add_format(name, cfgurl): | ||||
|             configJSON = self._download_webpage( | ||||
|                 cfgurl, videoId, | ||||
|                 u'Downloading ' + name + ' configuration', | ||||
|                 u'Unable to download ' + name + ' configuration') | ||||
|  | ||||
|             # Technically, it's JavaScript, not JSON | ||||
|             configJSON = configJSON.replace("'", '"') | ||||
|  | ||||
|             try: | ||||
|                 config = json.loads(configJSON) | ||||
|             except (ValueError,) as err: | ||||
|                 raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err)) | ||||
|             playlist = config['playlist'] | ||||
|             formats.append({ | ||||
|                 'url': playlist[1]['url'], | ||||
|                 'format_id': name, | ||||
|             }) | ||||
|  | ||||
|         _add_format(u'normal', configUrl) | ||||
|         hq_url = (configUrl + | ||||
|                   ('&hq=1' if '?' in configUrl else configUrl + '?hq=1')) | ||||
|         try: | ||||
|             config = json.loads(configJSON) | ||||
|         except (ValueError,) as err: | ||||
|             raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err)) | ||||
|             _add_format(u'hq', hq_url) | ||||
|         except ExtractorError: | ||||
|             pass  # That's fine, we'll just use normal quality | ||||
|  | ||||
|         playlist = config['playlist'] | ||||
|         videoUrl = playlist[1]['url'] | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': videoId, | ||||
|             'url': videoUrl, | ||||
|             'formats': formats, | ||||
|             'uploader': showName, | ||||
|             'upload_date': None, | ||||
|             'title': title, | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': videoDesc, | ||||
|             'player_url': playerUrl, | ||||
|         } | ||||
|  | ||||
|         return [info] | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import json | ||||
| import netrc | ||||
| import re | ||||
| import socket | ||||
|  | ||||
|   | ||||
| @@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor): | ||||
|         for i, _ in enumerate(files, 1): | ||||
|             video_id = '%04d%d' % (episode, i) | ||||
|             video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i) | ||||
|             video_title = 'Fernsehkritik %d.%d' % (episode, i) | ||||
|             videos.append({ | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|   | ||||
| @@ -1,9 +1,6 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GamekingsIE(InfoExtractor): | ||||
| @@ -11,7 +8,8 @@ class GamekingsIE(InfoExtractor): | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", | ||||
|         u'file': u'20130811.mp4', | ||||
|         u'md5': u'17f6088f7d0149ff2b46f2714bdb1954', | ||||
|         # MD5 is flaky, seems to change regularly | ||||
|         #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", | ||||
|             u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", | ||||
|   | ||||
| @@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_id = video_id = mobj.group('page_id') | ||||
|         page_id = mobj.group('page_id') | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|         data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video') | ||||
|         data_video = json.loads(unescapeHTML(data_video_json)) | ||||
|   | ||||
| @@ -162,6 +162,16 @@ class GenericIE(InfoExtractor): | ||||
|             raise ExtractorError(u'Failed to download URL: %s' % url) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # it's tempting to parse this further, but you would | ||||
|         # have to take into account all the variations like | ||||
|         #   Video Title - Site Name | ||||
|         #   Site Name | Video Title | ||||
|         #   Video Title - Tagline | Site Name | ||||
|         # and so on and so forth; it's just not practical | ||||
|         video_title = self._html_search_regex(r'<title>(.*)</title>', | ||||
|             webpage, u'video title', default=u'video', flags=re.DOTALL) | ||||
|  | ||||
|         # Look for BrightCove: | ||||
|         bc_url = BrightcoveIE._extract_brightcove_url(webpage) | ||||
|         if bc_url is not None: | ||||
| @@ -177,17 +187,20 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(surl, 'Vimeo') | ||||
|  | ||||
|         # Look for embedded YouTube player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             surl = unescapeHTML(mobj.group(u'url')) | ||||
|             return self.url_result(surl, 'Youtube') | ||||
|         matches = re.findall( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage) | ||||
|         if matches: | ||||
|             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') | ||||
|                      for tuppl in matches] | ||||
|             return self.playlist_result( | ||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
|  | ||||
|         # Look for Bandcamp pages with custom domain | ||||
|         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             burl = unescapeHTML(mobj.group(1)) | ||||
|             return self.url_result(burl, 'Bandcamp') | ||||
|             # Don't set the extractor because it can be a track url or an album | ||||
|             return self.url_result(burl) | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
| @@ -196,7 +209,7 @@ class GenericIE(InfoExtractor): | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit: JWPlayer JS loader | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage) | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Try to find twitter cards info | ||||
|             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||
| @@ -223,27 +236,16 @@ class GenericIE(InfoExtractor): | ||||
|         video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) | ||||
|  | ||||
|         # here's a fun little line of code for you: | ||||
|         video_extension = os.path.splitext(video_id)[1][1:] | ||||
|         video_id = os.path.splitext(video_id)[0] | ||||
|  | ||||
|         # it's tempting to parse this further, but you would | ||||
|         # have to take into account all the variations like | ||||
|         #   Video Title - Site Name | ||||
|         #   Site Name | Video Title | ||||
|         #   Video Title - Tagline | Site Name | ||||
|         # and so on and so forth; it's just not practical | ||||
|         video_title = self._html_search_regex(r'<title>(.*)</title>', | ||||
|             webpage, u'video title', default=u'video', flags=re.DOTALL) | ||||
|  | ||||
|         # video uploader is domain name | ||||
|         video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', | ||||
|             url, u'video uploader') | ||||
|  | ||||
|         return [{ | ||||
|         return { | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date':  None, | ||||
|             'title':    video_title, | ||||
|             'ext':      video_extension, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor): | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', | ||||
|         u'file': u'390161.mp4', | ||||
|         u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138', | ||||
|         u'md5': u'8b743df908c42f60cf6496586c7f12c3', | ||||
|         u'info_dict': { | ||||
|             u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",  | ||||
|             u"title": u"How to Tie a Square Knot Properly" | ||||
|   | ||||
| @@ -22,7 +22,7 @@ class JeuxVideoIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         title = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         xml_link = self._html_search_regex( | ||||
|             r'<param name="flashvars" value="config=(.*?)" />', | ||||
|   | ||||
| @@ -1,16 +1,17 @@ | ||||
| import re | ||||
| import json | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
|     get_meta_content, | ||||
|     ExtractorError, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LivestreamIE(InfoExtractor): | ||||
|     IE_NAME = u'livestream' | ||||
|     _VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', | ||||
| @@ -54,3 +55,44 @@ class LivestreamIE(InfoExtractor): | ||||
|             info = json.loads(self._download_webpage(api_url, video_id, | ||||
|                                                      u'Downloading video info')) | ||||
|             return self._extract_video_info(info) | ||||
|  | ||||
|  | ||||
| # The original version of Livestream uses a different system | ||||
| class LivestreamOriginalIE(InfoExtractor): | ||||
|     IE_NAME = u'livestream:original' | ||||
|     _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||
|         u'info_dict': { | ||||
|             u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||
|             u'ext': u'flv', | ||||
|             u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # rtmp | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         user = mobj.group('user') | ||||
|         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) | ||||
|  | ||||
|         api_response = self._download_webpage(api_url, video_id) | ||||
|         info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8')) | ||||
|         item = info.find('channel').find('item') | ||||
|         ns = {'media': 'http://search.yahoo.com/mrss'} | ||||
|         thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] | ||||
|         # Remove the extension and number from the path (like 1.jpg) | ||||
|         path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': item.find('title').text, | ||||
|             'url': 'rtmp://extondemand.livestream.com/ondemand', | ||||
|             'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path), | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': thumbnail_url, | ||||
|         } | ||||
|   | ||||
| @@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor): | ||||
|             'title': info['name'], | ||||
|             'url': final_song_url, | ||||
|             'ext': 'mp3', | ||||
|             'description': info['description'], | ||||
|             'description': info.get('description'), | ||||
|             'thumbnail': info['pictures'].get('extra_large'), | ||||
|             'uploader': info['user']['name'], | ||||
|             'uploader_id': info['user']['username'], | ||||
|   | ||||
| @@ -48,7 +48,7 @@ class MTVIE(InfoExtractor): | ||||
|     def _transform_rtmp_url(rtmp_video_url): | ||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url) | ||||
|         if not m: | ||||
|             raise ExtractorError(u'Cannot transform RTMP url') | ||||
|             return rtmp_video_url | ||||
|         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | ||||
|         return base + m.group('finalid') | ||||
|  | ||||
| @@ -59,7 +59,6 @@ class MTVIE(InfoExtractor): | ||||
|         if '/error_country_block.swf' in metadataXml: | ||||
|             raise ExtractorError(u'This video is not available from your country.', expected=True) | ||||
|         mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8')) | ||||
|         renditions = mdoc.findall('.//rendition') | ||||
|  | ||||
|         formats = [] | ||||
|         for rendition in mdoc.findall('.//rendition'): | ||||
|   | ||||
| @@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor): | ||||
|  | ||||
| class NHLVideocenterIE(NHLBaseInfoExtractor): | ||||
|     IE_NAME = u'nhl.com:videocenter' | ||||
|     IE_DESC = u'Download the first 12 videos from a videocenter category' | ||||
|     IE_DESC = u'NHL videocenter category' | ||||
|     _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?' | ||||
|  | ||||
|     @classmethod | ||||
|   | ||||
							
								
								
									
										131
									
								
								youtube_dl/extractor/niconico.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								youtube_dl/extractor/niconico.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,131 @@ | ||||
| # encoding: utf-8 | ||||
|  | ||||
| import re | ||||
| import socket | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
|     compat_str, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NiconicoIE(InfoExtractor): | ||||
|     IE_NAME = u'niconico' | ||||
|     IE_DESC = u'ニコニコ動画' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.nicovideo.jp/watch/sm22312215', | ||||
|         u'file': u'sm22312215.mp4', | ||||
|         u'md5': u'd1a75c0823e2f629128c43e1212760f9', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Big Buck Bunny', | ||||
|             u'uploader': u'takuya0301', | ||||
|             u'uploader_id': u'2698420', | ||||
|             u'upload_date': u'20131123', | ||||
|             u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'username': u'ydl.niconico@gmail.com', | ||||
|             u'password': u'youtube-dl', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' | ||||
|     _NETRC_MACHINE = 'niconico' | ||||
|     # If True it will raise an error if no login info is provided | ||||
|     _LOGIN_REQUIRED = True | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         # No authentication to be performed | ||||
|         if username is None: | ||||
|             if self._LOGIN_REQUIRED: | ||||
|                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|             return False | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
|             u'mail': username, | ||||
|             u'password': password, | ||||
|         } | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') | ||||
|         request = compat_urllib_request.Request( | ||||
|             u'https://secure.nicovideo.jp/secure/login', login_data) | ||||
|         login_results = self._download_webpage( | ||||
|             request, u'', note=u'Logging in', errnote=u'Unable to log in') | ||||
|         if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: | ||||
|             self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         # Get video webpage. We are not actually interested in it, but need | ||||
|         # the cookies in order to be able to download the info webpage | ||||
|         self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) | ||||
|  | ||||
|         video_info_webpage = self._download_webpage( | ||||
|             'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, | ||||
|             note=u'Downloading video info page') | ||||
|  | ||||
|         # Get flv info | ||||
|         flv_info_webpage = self._download_webpage( | ||||
|             u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, | ||||
|             video_id, u'Downloading flv info') | ||||
|         video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] | ||||
|  | ||||
|         # Start extracting information | ||||
|         video_info = xml.etree.ElementTree.fromstring(video_info_webpage) | ||||
|         video_title = video_info.find('.//title').text | ||||
|         video_extension = video_info.find('.//movie_type').text | ||||
|         video_format = video_extension.upper() | ||||
|         video_thumbnail = video_info.find('.//thumbnail_url').text | ||||
|         video_description = video_info.find('.//description').text | ||||
|         video_uploader_id = video_info.find('.//user_id').text | ||||
|         video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) | ||||
|         video_view_count = video_info.find('.//view_counter').text | ||||
|         video_webpage_url = video_info.find('.//watch_url').text | ||||
|  | ||||
|         # uploader | ||||
|         video_uploader = video_uploader_id | ||||
|         url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id | ||||
|         try: | ||||
|             user_info_webpage = self._download_webpage( | ||||
|                 url, video_id, note=u'Downloading user information') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err)) | ||||
|         else: | ||||
|             user_info = xml.etree.ElementTree.fromstring(user_info_webpage) | ||||
|             video_uploader = user_info.find('.//nickname').text | ||||
|  | ||||
|         return { | ||||
|             'id':          video_id, | ||||
|             'url':         video_real_url, | ||||
|             'title':       video_title, | ||||
|             'ext':         video_extension, | ||||
|             'format':      video_format, | ||||
|             'thumbnail':   video_thumbnail, | ||||
|             'description': video_description, | ||||
|             'uploader':    video_uploader, | ||||
|             'upload_date': video_upload_date, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'view_count':  video_view_count, | ||||
|             'webpage_url': video_webpage_url, | ||||
|         } | ||||
| @@ -6,7 +6,6 @@ from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
|   | ||||
| @@ -8,7 +8,9 @@ class RedTubeIE(InfoExtractor): | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.redtube.com/66418', | ||||
|         u'file': u'66418.mp4', | ||||
|         u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', | ||||
|         # md5 varies from time to time, as in | ||||
|         # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295 | ||||
|         #u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Sucked on a toilet", | ||||
|             u"age_limit": 18, | ||||
|   | ||||
| @@ -62,18 +62,6 @@ class RTLnowIE(InfoExtractor): | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1', | ||||
|         u'file': u'129679.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20131016',  | ||||
|             u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...', | ||||
|             u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', | ||||
|         u'file': u'124903.flv', | ||||
|   | ||||
| @@ -59,6 +59,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|     ] | ||||
|  | ||||
|     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
| @@ -75,36 +76,79 @@ class SoundcloudIE(InfoExtractor): | ||||
|     def _extract_info_dict(self, info, full_title=None, quiet=False): | ||||
|         track_id = compat_str(info['id']) | ||||
|         name = full_title or track_id | ||||
|         if quiet == False: | ||||
|         if quiet: | ||||
|             self.report_extraction(name) | ||||
|  | ||||
|         thumbnail = info['artwork_url'] | ||||
|         if thumbnail is not None: | ||||
|             thumbnail = thumbnail.replace('-large', '-t500x500') | ||||
|         ext = info.get('original_format', u'mp3') | ||||
|         result = { | ||||
|             'id':       track_id, | ||||
|             'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID, | ||||
|             'id': track_id, | ||||
|             'uploader': info['user']['username'], | ||||
|             'upload_date': unified_strdate(info['created_at']), | ||||
|             'title':    info['title'], | ||||
|             'ext':      u'mp3', | ||||
|             'title': info['title'], | ||||
|             'description': info['description'], | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|         if info.get('downloadable', False): | ||||
|             result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID) | ||||
|         if not info.get('streamable', False): | ||||
|             # We have to get the rtmp url | ||||
|             # We can build a direct link to the song | ||||
|             format_url = ( | ||||
|                 u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format( | ||||
|                     track_id, self._CLIENT_ID)) | ||||
|             result['formats'] = [{ | ||||
|                 'format_id': 'download', | ||||
|                 'ext': ext, | ||||
|                 'url': format_url, | ||||
|                 'vcodec': 'none', | ||||
|             }] | ||||
|         else: | ||||
|             # We have to retrieve the url | ||||
|             stream_json = self._download_webpage( | ||||
|                 'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID), | ||||
|                 'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID), | ||||
|                 track_id, u'Downloading track url') | ||||
|             rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url'] | ||||
|             # The url doesn't have an rtmp app, we have to extract the playpath | ||||
|             url, path = rtmp_url.split('mp3:', 1) | ||||
|             result.update({ | ||||
|                 'url': url, | ||||
|                 'play_path': 'mp3:' + path, | ||||
|             }) | ||||
|  | ||||
|             formats = [] | ||||
|             format_dict = json.loads(stream_json) | ||||
|             for key, stream_url in format_dict.items(): | ||||
|                 if key.startswith(u'http'): | ||||
|                     formats.append({ | ||||
|                         'format_id': key, | ||||
|                         'ext': ext, | ||||
|                         'url': stream_url, | ||||
|                         'vcodec': 'none', | ||||
|                     }) | ||||
|                 elif key.startswith(u'rtmp'): | ||||
|                     # The url doesn't have an rtmp app, we have to extract the playpath | ||||
|                     url, path = stream_url.split('mp3:', 1) | ||||
|                     formats.append({ | ||||
|                         'format_id': key, | ||||
|                         'url': url, | ||||
|                         'play_path': 'mp3:' + path, | ||||
|                         'ext': ext, | ||||
|                         'vcodec': 'none', | ||||
|                     }) | ||||
|  | ||||
|             if not formats: | ||||
|                 # We fallback to the stream_url in the original info, this | ||||
|                 # cannot be always used, sometimes it can give an HTTP 404 error | ||||
|                 formats.append({ | ||||
|                     'format_id': u'fallback', | ||||
|                     'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, | ||||
|                     'ext': ext, | ||||
|                     'vcodec': 'none', | ||||
|                 }) | ||||
|  | ||||
|             def format_pref(f): | ||||
|                 if f['format_id'].startswith('http'): | ||||
|                     return 2 | ||||
|                 if f['format_id'].startswith('rtmp'): | ||||
|                     return 1 | ||||
|                 return 0 | ||||
|  | ||||
|             formats.sort(key=format_pref) | ||||
|             result['formats'] = formats | ||||
|  | ||||
|         return result | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -158,7 +202,6 @@ class SoundcloudSetIE(SoundcloudIE): | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         info_json = self._download_webpage(resolv_url, full_title) | ||||
|  | ||||
|         videos = [] | ||||
|         info = json.loads(info_json) | ||||
|         if 'errors' in info: | ||||
|             for err in info['errors']: | ||||
|   | ||||
| @@ -5,21 +5,19 @@ from .mtv import MTVIE, _media_xml_tag | ||||
|  | ||||
| class SouthParkStudiosIE(MTVIE): | ||||
|     IE_NAME = u'southparkstudios.com' | ||||
|     _VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)' | ||||
|     _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' | ||||
|  | ||||
|     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' | ||||
|  | ||||
|     _TEST = { | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', | ||||
|         u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Bat Daded', | ||||
|             u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [] | ||||
|     }] | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
| @@ -31,8 +29,23 @@ class SouthParkStudiosIE(MTVIE): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         url = u'http://www.' + mobj.group(u'url') | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', | ||||
|                                   webpage, u'mgid') | ||||
|         return self._get_videos_info(mgid) | ||||
|  | ||||
| class SouthparkDeIE(SouthParkStudiosIE): | ||||
|     IE_NAME = u'southpark.de' | ||||
|     _VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' | ||||
|     _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', | ||||
|         u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'The Government Won\'t Respect My Privacy', | ||||
|             u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', | ||||
|         }, | ||||
|     }] | ||||
|   | ||||
| @@ -6,7 +6,6 @@ from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
| @@ -36,11 +35,12 @@ class SpankwireIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title') | ||||
|         video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False) | ||||
|         description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False) | ||||
|         if len(description) == 0: | ||||
|             description = None | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False) | ||||
|  | ||||
|         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) | ||||
|         if webpage.find('flashvars\.encrypted = "true"') != -1: | ||||
|   | ||||
| @@ -6,14 +6,22 @@ from .common import InfoExtractor | ||||
|  | ||||
| class SpiegelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', | ||||
|         u'file': u'1259285.mp4', | ||||
|         u'md5': u'2c2754212136f35fb4b19767d242f66e', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" | ||||
|         } | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', | ||||
|         u'file': u'1309159.mp4', | ||||
|         u'md5': u'f2cdf638d7aa47654e251e1aee360af1', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
| @@ -21,25 +29,38 @@ class SpiegelIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>', | ||||
|             webpage, u'title') | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<div class="module-title">(.*?)</div>', webpage, u'title') | ||||
|  | ||||
|         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' | ||||
|         xml_code = self._download_webpage(xml_url, video_id, | ||||
|                     note=u'Downloading XML', errnote=u'Failed to download XML') | ||||
|         xml_code = self._download_webpage( | ||||
|             xml_url, video_id, | ||||
|             note=u'Downloading XML', errnote=u'Failed to download XML') | ||||
|  | ||||
|         idoc = xml.etree.ElementTree.fromstring(xml_code) | ||||
|         last_type = idoc[-1] | ||||
|         filename = last_type.findall('./filename')[0].text | ||||
|         duration = float(last_type.findall('./duration')[0].text) | ||||
|  | ||||
|         video_url = 'http://video2.spiegel.de/flash/' + filename | ||||
|         video_ext = filename.rpartition('.')[2] | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format_id': n.tag.rpartition('type')[2], | ||||
|                 'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text, | ||||
|                 'width': int(n.find('./width').text), | ||||
|                 'height': int(n.find('./height').text), | ||||
|                 'abr': int(n.find('./audiobitrate').text), | ||||
|                 'vbr': int(n.find('./videobitrate').text), | ||||
|                 'vcodec': n.find('./codec').text, | ||||
|                 'acodec': 'MP4A', | ||||
|             } | ||||
|             for n in list(idoc) | ||||
|             # Blacklist type 6, it's extremely LQ and not available on the same server | ||||
|             if n.tag.startswith('type') and n.tag != 'type6' | ||||
|         ] | ||||
|         formats.sort(key=lambda f: f['vbr']) | ||||
|         duration = float(idoc[0].findall('./duration')[0].text) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': video_ext, | ||||
|             'title': video_title, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         return [info] | ||||
|         return info | ||||
|   | ||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/streamcloud.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/streamcloud.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| # coding: utf-8 | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class StreamcloudIE(InfoExtractor): | ||||
|     IE_NAME = u'streamcloud.eu' | ||||
|     _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', | ||||
|         u'file': u'skp9j99s4bpz.mp4', | ||||
|         u'md5': u'6bea4c7fa5daaacc2a946b7146286686', | ||||
|         u'info_dict': { | ||||
|             u'title': u'youtube-dl test video  \'/\\ ä ↭', | ||||
|             u'duration': 9, | ||||
|         }, | ||||
|         u'skip': u'Only available from the EU' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         orig_webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         fields = re.findall(r'''(?x)<input\s+ | ||||
|             type="(?:hidden|submit)"\s+ | ||||
|             name="([^"]+)"\s+ | ||||
|             (?:id="[^"]+"\s+)? | ||||
|             value="([^"]*)" | ||||
|             ''', orig_webpage) | ||||
|         post = compat_urllib_parse.urlencode(fields) | ||||
|  | ||||
|         self.to_screen('%s: Waiting for timeout' % video_id) | ||||
|         time.sleep(12) | ||||
|         headers = { | ||||
|             b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|         } | ||||
|         req = compat_urllib_request.Request(url, post, headers) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             req, video_id, note=u'Downloading video page ...') | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1[^>]*>([^<]+)<', webpage, u'title') | ||||
|         video_url = self._search_regex( | ||||
|             r'file:\s*"([^"]+)"', webpage, u'video URL') | ||||
|         duration_str = self._search_regex( | ||||
|             r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False) | ||||
|         duration = None if duration_str is None else int(duration_str) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'duration': duration, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -15,7 +15,8 @@ class SztvHuIE(InfoExtractor): | ||||
|         u'info_dict': { | ||||
|             u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren", | ||||
|             u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...', | ||||
|         } | ||||
|         }, | ||||
|         u'skip': u'Service temporarily disabled as of 2013-11-20' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -60,7 +60,7 @@ class TeamcocoIE(InfoExtractor): | ||||
|                 return -1 | ||||
|         formats.sort(key=sort_key) | ||||
|         if not formats: | ||||
|             raise RegexNotFoundError(u'Unable to extract video URL') | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|  | ||||
|         return { | ||||
|             'id':          video_id, | ||||
|   | ||||
| @@ -4,7 +4,6 @@ import re | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     RegexNotFoundError, | ||||
| ) | ||||
|  | ||||
| @@ -43,26 +42,25 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) | ||||
|             return [self._playlist_videos_info(url,name,playlist_id)] | ||||
|  | ||||
|     def _playlist_videos_info(self,url,name,playlist_id=0): | ||||
|  | ||||
|     def _playlist_videos_info(self, url, name, playlist_id): | ||||
|         '''Returns the videos of the playlist''' | ||||
|         video_RE=r''' | ||||
|                      <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)" | ||||
|                      ([.\s]*?)data-playlist_item_id="(\d+)" | ||||
|                      ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)" | ||||
|                      ''' | ||||
|         video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>' | ||||
|         webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage') | ||||
|         m_videos=re.finditer(video_RE,webpage,re.VERBOSE) | ||||
|         m_names=re.finditer(video_name_RE,webpage) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, playlist_id, u'Downloading playlist webpage') | ||||
|         matches = re.finditer( | ||||
|             r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>', | ||||
|             webpage) | ||||
|  | ||||
|         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', | ||||
|                                                  webpage, 'playlist title') | ||||
|  | ||||
|         playlist_entries = [] | ||||
|         for m_video, m_name in zip(m_videos,m_names): | ||||
|             talk_url='http://www.ted.com%s' % m_name.group('talk_url') | ||||
|             playlist_entries.append(self.url_result(talk_url, 'TED')) | ||||
|         return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title) | ||||
|         playlist_entries = [ | ||||
|             self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED') | ||||
|             for m in matches | ||||
|         ] | ||||
|         return self.playlist_result( | ||||
|             playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title) | ||||
|  | ||||
|     def _talk_info(self, url, video_id=0): | ||||
|         """Return the video for the talk in the url""" | ||||
| @@ -85,7 +83,7 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'url': stream['file'], | ||||
|             'format': stream['id'] | ||||
|             } for stream in info['htmlStreams']] | ||||
|         } for stream in info['htmlStreams']] | ||||
|  | ||||
|         video_id = info['id'] | ||||
|  | ||||
| @@ -95,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|             self._list_available_subtitles(video_id, webpage) | ||||
|             return | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
| @@ -104,11 +102,6 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(info['formats'][-1]) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
|             options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) | ||||
| @@ -119,6 +112,6 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|                     url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||
|                     sub_lang_list[l] = url | ||||
|                 return sub_lang_list | ||||
|         except RegexNotFoundError as err: | ||||
|         except RegexNotFoundError: | ||||
|             self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|         return {} | ||||
|   | ||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/toutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/toutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| # coding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TouTvIE(InfoExtractor): | ||||
|     IE_NAME = u'tou.tv' | ||||
|     _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.tou.tv/30-vies/S04E41', | ||||
|         u'file': u'30-vies_S04E41.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'30 vies Saison 4 / Épisode 41', | ||||
|             u'description': u'md5:da363002db82ccbe4dafeb9cab039b09', | ||||
|             u'age_limit': 8, | ||||
|             u'uploader': u'Groupe des Nouveaux Médias', | ||||
|             u'duration': 1296, | ||||
|             u'upload_date': u'20131118', | ||||
|             u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True,  # Requires rtmpdump | ||||
|         }, | ||||
|         u'skip': 'Only available in Canada' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mediaId = self._search_regex( | ||||
|             r'"idMedia":\s*"([^"]+)"', webpage, u'media ID') | ||||
|  | ||||
|         streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId | ||||
|         streams_webpage = self._download_webpage( | ||||
|             streams_url, video_id, note=u'Downloading stream list') | ||||
|  | ||||
|         streams_doc = xml.etree.ElementTree.fromstring( | ||||
|             streams_webpage.encode('utf-8')) | ||||
|         video_url = next(n.text | ||||
|                          for n in streams_doc.findall('.//choice/url') | ||||
|                          if u'//ad.doubleclick' not in n.text) | ||||
|         if video_url.endswith('/Unavailable.flv'): | ||||
|             raise ExtractorError( | ||||
|                 u'Access to this video is blocked from outside of Canada', | ||||
|                 expected=True) | ||||
|  | ||||
|         duration_str = self._html_search_meta( | ||||
|             'video:duration', webpage, u'duration') | ||||
|         duration = int(duration_str) if duration_str else None | ||||
|         upload_date_str = self._html_search_meta( | ||||
|             'video:release_date', webpage, u'upload date') | ||||
|         upload_date = unified_strdate(upload_date_str) if upload_date_str else None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'url': video_url, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'uploader': self._dc_search_uploader(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'age_limit': self._media_rating_search(webpage), | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|             'ext': 'mp4', | ||||
|         } | ||||
| @@ -5,8 +5,6 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
|   | ||||
| @@ -13,9 +13,10 @@ class TvpIE(InfoExtractor): | ||||
|         u'md5': u'148408967a6a468953c0a75cbdaf0d7a', | ||||
|         u'file': u'12878238.wmv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'31.10.2013', | ||||
|             u'description': u'31.10.2013', | ||||
|             u'title': u'31.10.2013 - Odcinek 2', | ||||
|             u'description': u'31.10.2013 - Odcinek 2', | ||||
|         }, | ||||
|         u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -78,12 +78,13 @@ class VevoIE(InfoExtractor): | ||||
|                 continue | ||||
|  | ||||
|             format_url = self._SMIL_BASE_URL + m.group('path') | ||||
|             format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' % | ||||
|                            m.groupdict()) | ||||
|             formats.append({ | ||||
|                 'url': format_url, | ||||
|                 'format_id': u'SMIL_' + m.group('cbr'), | ||||
|                 'format_note': format_note, | ||||
|                 'vcodec': m.group('vcodec'), | ||||
|                 'acodec': m.group('acodec'), | ||||
|                 'vbr': int(m.group('vbr')), | ||||
|                 'abr': int(m.group('abr')), | ||||
|                 'ext': m.group('ext'), | ||||
|                 'width': int(m.group('width')), | ||||
|                 'height': int(m.group('height')), | ||||
|   | ||||
| @@ -9,12 +9,10 @@ class VideoPremiumIE(InfoExtractor): | ||||
|     _TEST = { | ||||
|         u'url': u'http://videopremium.tv/4w7oadjsf156', | ||||
|         u'file': u'4w7oadjsf156.f4v', | ||||
|         u'md5': u'e51e4a266aab7531c6ac06f4ffee3b0d', | ||||
|         u'info_dict': { | ||||
|             u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4" | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -24,12 +22,16 @@ class VideoPremiumIE(InfoExtractor): | ||||
|         webpage_url = 'http://videopremium.tv/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage): | ||||
|             # Download again, we need a cookie | ||||
|             webpage = self._download_webpage( | ||||
|                 webpage_url, video_id, | ||||
|                 note=u'Downloading webpage again (with cookie)') | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<', | ||||
|             webpage, u'video title') | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title') | ||||
|  | ||||
|         return [{ | ||||
|         return { | ||||
|             'id':          video_id, | ||||
|             'url':         "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16), | ||||
|             'play_path':   "mp4:%s.f4v" % video_id, | ||||
| @@ -37,4 +39,4 @@ class VideoPremiumIE(InfoExtractor): | ||||
|             'player_url':  "http://videopremium.tv/uplayer/uppod.swf", | ||||
|             'ext':         'f4v', | ||||
|             'title':       video_title, | ||||
|         }] | ||||
|         } | ||||
							
								
								
									
										101
									
								
								youtube_dl/extractor/viki.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								youtube_dl/extractor/viki.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,101 @@ | ||||
| import re | ||||
|  | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| ) | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
|  | ||||
| class VikiIE(SubtitlesInfoExtractor): | ||||
|     IE_NAME = u'viki' | ||||
|  | ||||
|     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14', | ||||
|         u'file': u'1023585v.mp4', | ||||
|         u'md5': u'a21454021c2646f5433514177e2caa5f', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Heirs Episode 14', | ||||
|             u'uploader': u'SBS', | ||||
|             u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e', | ||||
|             u'upload_date': u'20131121', | ||||
|             u'age_limit': 13, | ||||
|         }, | ||||
|         u'skip': u'Blocked in the US', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         uploader_m = re.search( | ||||
|             r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage) | ||||
|         if uploader_m is None: | ||||
|             uploader = None | ||||
|         else: | ||||
|             uploader = uploader_m.group(1).strip() | ||||
|  | ||||
|         rating_str = self._html_search_regex( | ||||
|             r'<strong>Rating: </strong>\s*([^<]*)<', webpage, | ||||
|             u'rating information', default='').strip() | ||||
|         RATINGS = { | ||||
|             'G': 0, | ||||
|             'PG': 10, | ||||
|             'PG-13': 13, | ||||
|             'R': 16, | ||||
|             'NC': 18, | ||||
|         } | ||||
|         age_limit = RATINGS.get(rating_str) | ||||
|  | ||||
|         info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id | ||||
|         info_webpage = self._download_webpage( | ||||
|             info_url, video_id, note=u'Downloading info page') | ||||
|         if re.match(r'\s*<div\s+class="video-error', info_webpage): | ||||
|             raise ExtractorError( | ||||
|                 u'Video %s is blocked from your location.' % video_id, | ||||
|                 expected=True) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL') | ||||
|  | ||||
|         upload_date_str = self._html_search_regex( | ||||
|             r'"created_at":"([^"]+)"', info_webpage, u'upload date') | ||||
|         upload_date = ( | ||||
|             unified_strdate(upload_date_str) | ||||
|             if upload_date_str is not None | ||||
|             else None | ||||
|         ) | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, info_webpage) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, info_webpage) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'age_limit': age_limit, | ||||
|             'uploader': uploader, | ||||
|             'subtitles': video_subtitles, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, info_webpage): | ||||
|         res = {} | ||||
|         for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage): | ||||
|             sturl = unescapeHTML(sturl_html) | ||||
|             m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl) | ||||
|             if not m: | ||||
|                 continue | ||||
|             res[m.group('lang')] = sturl | ||||
|         return res | ||||
| @@ -151,7 +151,7 @@ class VimeoIE(InfoExtractor): | ||||
|                 config = json.loads(config_json) | ||||
|             except RegexNotFoundError: | ||||
|                 # For pro videos or player.vimeo.com urls | ||||
|                 config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], | ||||
|                 config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'], | ||||
|                     webpage, u'info section', flags=re.DOTALL) | ||||
|                 config = json.loads(config) | ||||
|         except Exception as e: | ||||
|   | ||||
| @@ -5,7 +5,6 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
| class XTubeIE(InfoExtractor): | ||||
|   | ||||
| @@ -139,10 +139,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com' | ||||
|     _VALID_URL = r"""^ | ||||
|     _VALID_URL = r"""(?x)^ | ||||
|                      ( | ||||
|                          (?:https?://)?                                       # http(s):// (optional) | ||||
|                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| | ||||
|                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional) | ||||
|                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| | ||||
|                             tube\.majestyc\.net/| | ||||
|                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains | ||||
|                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls | ||||
| @@ -248,21 +248,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         '248': 'webm', | ||||
|     } | ||||
|     _video_dimensions = { | ||||
|         '5': '240x400', | ||||
|         '5': '400x240', | ||||
|         '6': '???', | ||||
|         '13': '???', | ||||
|         '17': '144x176', | ||||
|         '18': '360x640', | ||||
|         '22': '720x1280', | ||||
|         '34': '360x640', | ||||
|         '35': '480x854', | ||||
|         '36': '240x320', | ||||
|         '37': '1080x1920', | ||||
|         '38': '3072x4096', | ||||
|         '43': '360x640', | ||||
|         '44': '480x854', | ||||
|         '45': '720x1280', | ||||
|         '46': '1080x1920', | ||||
|         '17': '176x144', | ||||
|         '18': '640x360', | ||||
|         '22': '1280x720', | ||||
|         '34': '640x360', | ||||
|         '35': '854x480', | ||||
|         '36': '320x240', | ||||
|         '37': '1920x1080', | ||||
|         '38': '4096x3072', | ||||
|         '43': '640x360', | ||||
|         '44': '854x480', | ||||
|         '45': '1280x720', | ||||
|         '46': '1920x1080', | ||||
|         '82': '360p', | ||||
|         '83': '480p', | ||||
|         '84': '720p', | ||||
| @@ -363,6 +363,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 u"uploader_id": u"justintimberlakeVEVO" | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ", | ||||
|             u"file":  u"yZIXLfi8CZQ.mp4", | ||||
|             u"note": u"Embed-only video (#1746)", | ||||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20120608", | ||||
|                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", | ||||
|                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7", | ||||
|                 u"uploader": u"SET India", | ||||
|                 u"uploader_id": u"setindia" | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|  | ||||
| @@ -370,7 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         if YoutubePlaylistIE.suitable(url): return False | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|         return re.match(cls._VALID_URL, url) is not None | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super(YoutubeIE, self).__init__(*args, **kwargs) | ||||
| @@ -1019,6 +1031,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         """Turn the encrypted s field into a working signature""" | ||||
|  | ||||
|         if player_url is not None: | ||||
|             if player_url.startswith(u'//'): | ||||
|                 player_url = u'https:' + player_url | ||||
|             try: | ||||
|                 player_id = (player_url, len(s)) | ||||
|                 if player_id not in self._player_cache: | ||||
| @@ -1098,7 +1112,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': lang, | ||||
|                 'v': video_id, | ||||
|                 'fmt': self._downloader.params.get('subtitlesformat'), | ||||
|                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'), | ||||
|                 'name': l[0].encode('utf-8'), | ||||
|             }) | ||||
|             url = u'http://www.youtube.com/api/timedtext?' + params | ||||
| @@ -1111,7 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         sub_format = self._downloader.params.get('subtitlesformat', 'srt') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = u'Couldn\'t find automatic captions for %s' % video_id | ||||
| @@ -1270,7 +1284,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             # We simulate the access to the video from www.youtube.com/v/{video_id} | ||||
|             # this can be viewed without login into Youtube | ||||
|             data = compat_urllib_parse.urlencode({'video_id': video_id, | ||||
|                                                   'el': 'embedded', | ||||
|                                                   'el': 'player_embedded', | ||||
|                                                   'gl': 'US', | ||||
|                                                   'hl': 'en', | ||||
|                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id, | ||||
| @@ -1299,6 +1313,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             else: | ||||
|                 raise ExtractorError(u'"token" parameter not in video info for unknown reason') | ||||
|  | ||||
|         if 'view_count' in video_info: | ||||
|             view_count = int(video_info['view_count'][0]) | ||||
|         else: | ||||
|             view_count = None | ||||
|  | ||||
|         # Check for "rental" videos | ||||
|         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: | ||||
|             raise ExtractorError(u'"rental" videos not supported') | ||||
| @@ -1487,10 +1506,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'age_limit':    18 if age_gate else 0, | ||||
|                 'annotations':  video_annotations, | ||||
|                 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, | ||||
|                 'view_count': view_count, | ||||
|             }) | ||||
|         return results | ||||
|  | ||||
| class YoutubePlaylistIE(InfoExtractor): | ||||
| class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com playlists' | ||||
|     _VALID_URL = r"""(?: | ||||
|                         (?:https?://)? | ||||
| @@ -1506,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|                      | | ||||
|                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) | ||||
|                      )""" | ||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' | ||||
|     _MAX_RESULTS = 50 | ||||
|     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' | ||||
|     _MORE_PAGES_INDICATOR = r'data-link-type="next"' | ||||
|     _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&' | ||||
|     IE_NAME = u'youtube:playlist' | ||||
|  | ||||
|     @classmethod | ||||
| @@ -1515,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract playlist id | ||||
|         mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
| @@ -1528,45 +1552,28 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|             video_id = query_dict['v'][0] | ||||
|             if self._downloader.params.get('noplaylist'): | ||||
|                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) | ||||
|                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') | ||||
|                 return self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|             else: | ||||
|                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|  | ||||
|         # Download playlist videos from API | ||||
|         videos = [] | ||||
|         # Extract the video ids from the playlist pages | ||||
|         ids = [] | ||||
|  | ||||
|         for page_num in itertools.count(1): | ||||
|             start_index = self._MAX_RESULTS * (page_num - 1) + 1 | ||||
|             if start_index >= 1000: | ||||
|                 self._downloader.report_warning(u'Max number of results reached') | ||||
|                 break | ||||
|             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index) | ||||
|             url = self._TEMPLATE_URL % (playlist_id, page_num) | ||||
|             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) | ||||
|             # The ids are duplicated | ||||
|             new_ids = orderedSet(re.findall(self._VIDEO_RE, page)) | ||||
|             ids.extend(new_ids) | ||||
|  | ||||
|             try: | ||||
|                 response = json.loads(page) | ||||
|             except ValueError as err: | ||||
|                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) | ||||
|  | ||||
|             if 'feed' not in response: | ||||
|                 raise ExtractorError(u'Got a malformed response from YouTube API') | ||||
|             playlist_title = response['feed']['title']['$t'] | ||||
|             if 'entry' not in response['feed']: | ||||
|                 # Number of videos is a multiple of self._MAX_RESULTS | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, page) is None: | ||||
|                 break | ||||
|  | ||||
|             for entry in response['feed']['entry']: | ||||
|                 index = entry['yt$position']['$t'] | ||||
|                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']: | ||||
|                     videos.append(( | ||||
|                         index, | ||||
|                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t'] | ||||
|                     )) | ||||
|         playlist_title = self._og_search_title(page) | ||||
|  | ||||
|         videos = [v[1] for v in sorted(videos)] | ||||
|  | ||||
|         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos] | ||||
|         return [self.playlist_result(url_results, playlist_id, playlist_title)] | ||||
|         url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id) | ||||
|                        for vid_id in ids] | ||||
|         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||
|  | ||||
|  | ||||
| class YoutubeChannelIE(InfoExtractor): | ||||
| @@ -1592,26 +1599,37 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|         # Download channel page | ||||
|         channel_id = mobj.group(1) | ||||
|         video_ids = [] | ||||
|         url = 'https://www.youtube.com/channel/%s/videos' % channel_id | ||||
|         channel_page = self._download_webpage(url, channel_id) | ||||
|         if re.search(r'channel-header-autogenerated-label', channel_page) is not None: | ||||
|             autogenerated = True | ||||
|         else: | ||||
|             autogenerated = False | ||||
|  | ||||
|         # Download all channel pages using the json-based channel_ajax query | ||||
|         for pagenum in itertools.count(1): | ||||
|             url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||
|             page = self._download_webpage(url, channel_id, | ||||
|                                           u'Downloading page #%s' % pagenum) | ||||
|  | ||||
|             page = json.loads(page) | ||||
|  | ||||
|             ids_in_page = self.extract_videos_from_page(page['content_html']) | ||||
|             video_ids.extend(ids_in_page) | ||||
|  | ||||
|             if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: | ||||
|                 break | ||||
|         if autogenerated: | ||||
|             # The videos are contained in a single page | ||||
|             # the ajax pages can't be used, they are empty | ||||
|             video_ids = self.extract_videos_from_page(channel_page) | ||||
|         else: | ||||
|             # Download all channel pages using the json-based channel_ajax query | ||||
|             for pagenum in itertools.count(1): | ||||
|                 url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||
|                 page = self._download_webpage(url, channel_id, | ||||
|                                               u'Downloading page #%s' % pagenum) | ||||
|      | ||||
|                 page = json.loads(page) | ||||
|      | ||||
|                 ids_in_page = self.extract_videos_from_page(page['content_html']) | ||||
|                 video_ids.extend(ids_in_page) | ||||
|      | ||||
|                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: | ||||
|                     break | ||||
|  | ||||
|         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|  | ||||
|         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] | ||||
|         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] | ||||
|         return [self.playlist_result(url_entries, channel_id)] | ||||
|         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                        for video_id in video_ids] | ||||
|         return self.playlist_result(url_entries, channel_id) | ||||
|  | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
| @@ -1675,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor): | ||||
|             if len(ids_in_page) < self._GDATA_PAGE_SIZE: | ||||
|                 break | ||||
|  | ||||
|         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] | ||||
|         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] | ||||
|         return [self.playlist_result(url_results, playlist_title = username)] | ||||
|         url_results = [ | ||||
|             self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|             for video_id in video_ids] | ||||
|         return self.playlist_result(url_results, playlist_title=username) | ||||
|  | ||||
|  | ||||
| class YoutubeSearchIE(SearchInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com searches' | ||||
| @@ -1718,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|  | ||||
|         if len(video_ids) > n: | ||||
|             video_ids = video_ids[:n] | ||||
|         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] | ||||
|         videos = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                   for video_id in video_ids] | ||||
|         return self.playlist_result(videos, query) | ||||
|  | ||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | ||||
| @@ -1778,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|             feed_html = info['feed_html'] | ||||
|             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) | ||||
|             ids = orderedSet(m.group(1) for m in m_ids) | ||||
|             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) | ||||
|             feed_entries.extend( | ||||
|                 self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                 for video_id in ids) | ||||
|             if info['paging'] is None: | ||||
|                 break | ||||
|         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) | ||||
| @@ -1803,6 +1826,20 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): | ||||
|     _PAGING_STEP = 100 | ||||
|     _PERSONAL_FEED = True | ||||
|  | ||||
| class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): | ||||
|     IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)' | ||||
|     _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory' | ||||
|     _FEED_NAME = 'history' | ||||
|     _PERSONAL_FEED = True | ||||
|     _PLAYLIST_TITLE = u'Youtube Watch History' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History') | ||||
|         data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging') | ||||
|         # The step is actually a ridiculously big number (like 1374343569725646) | ||||
|         self._PAGING_STEP = int(data_paging) | ||||
|         return super(YoutubeHistoryIE, self)._real_extract(url) | ||||
|  | ||||
| class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | ||||
|     IE_NAME = u'youtube:favorites' | ||||
|     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' | ||||
|   | ||||
| @@ -1,75 +1,111 @@ | ||||
| import operator | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     parse_xml_doc, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ZDFIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?' | ||||
|     _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         if mobj.group('hash'): | ||||
|             url = url.replace(u'#', u'', 1) | ||||
|         xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | ||||
|         info_xml = self._download_webpage( | ||||
|             xml_url, video_id, note=u'Downloading video info') | ||||
|         doc = parse_xml_doc(info_xml) | ||||
|  | ||||
|         html = self._download_webpage(url, video_id) | ||||
|         streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] | ||||
|         if streams is None: | ||||
|             raise ExtractorError(u'No media url found.') | ||||
|         title = doc.find('.//information/title').text | ||||
|         description = doc.find('.//information/detail').text | ||||
|         uploader_node = doc.find('.//details/originChannelTitle') | ||||
|         uploader = None if uploader_node is None else uploader_node.text | ||||
|         duration_str = doc.find('.//details/length').text | ||||
|         duration_m = re.match(r'''(?x)^ | ||||
|             (?P<hours>[0-9]{2}) | ||||
|             :(?P<minutes>[0-9]{2}) | ||||
|             :(?P<seconds>[0-9]{2}) | ||||
|             (?:\.(?P<ms>[0-9]+)?) | ||||
|             ''', duration_str) | ||||
|         duration = ( | ||||
|             ( | ||||
|                 (int(duration_m.group('hours')) * 60 * 60) + | ||||
|                 (int(duration_m.group('minutes')) * 60) + | ||||
|                 int(duration_m.group('seconds')) | ||||
|             ) | ||||
|             if duration_m | ||||
|             else None | ||||
|         ) | ||||
|         upload_date = unified_strdate(doc.find('.//details/airtime').text) | ||||
|  | ||||
|         # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url | ||||
|         # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url | ||||
|         # choose first/default media type and highest quality for now | ||||
|         def stream_pref(s): | ||||
|             TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming'] | ||||
|         def xml_to_format(fnode): | ||||
|             video_url = fnode.find('url').text | ||||
|             is_available = u'http://www.metafilegenerator' not in video_url | ||||
|  | ||||
|             format_id = fnode.attrib['basetype'] | ||||
|             format_m = re.match(r'''(?x) | ||||
|                 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | ||||
|                 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | ||||
|             ''', format_id) | ||||
|  | ||||
|             ext = format_m.group('container') | ||||
|             is_supported = ext != 'f4f' | ||||
|  | ||||
|             PROTO_ORDER = ['http', 'rtmp', 'rtsp'] | ||||
|             try: | ||||
|                 type_pref = TYPE_ORDER.index(s['media_type']) | ||||
|                 proto_pref = -PROTO_ORDER.index(format_m.group('proto')) | ||||
|             except ValueError: | ||||
|                 type_pref = 999 | ||||
|                 proto_pref = 999 | ||||
|  | ||||
|             QUALITY_ORDER = ['veryhigh', '300'] | ||||
|             quality = fnode.find('./quality').text | ||||
|             QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low'] | ||||
|             try: | ||||
|                 quality_pref = QUALITY_ORDER.index(s['quality']) | ||||
|                 quality_pref = -QUALITY_ORDER.index(quality) | ||||
|             except ValueError: | ||||
|                 quality_pref = 999 | ||||
|  | ||||
|             return (type_pref, quality_pref) | ||||
|             abr = int(fnode.find('./audioBitrate').text) // 1000 | ||||
|             vbr = int(fnode.find('./videoBitrate').text) // 1000 | ||||
|             pref = (is_available, is_supported, | ||||
|                     proto_pref, quality_pref, vbr, abr) | ||||
|  | ||||
|         sorted_streams = sorted(streams, key=stream_pref) | ||||
|         if not sorted_streams: | ||||
|             raise ExtractorError(u'No stream found.') | ||||
|         stream = sorted_streams[0] | ||||
|             format_note = u'' | ||||
|             if not is_supported: | ||||
|                 format_note += u'(unsupported)' | ||||
|             if not format_note: | ||||
|                 format_note = None | ||||
|  | ||||
|         media_link = self._download_webpage( | ||||
|             stream['video_url'], | ||||
|             video_id, | ||||
|             u'Get stream URL') | ||||
|             return { | ||||
|                 'format_id': format_id + u'-' + quality, | ||||
|                 'url': video_url, | ||||
|                 'ext': ext, | ||||
|                 'acodec': format_m.group('acodec'), | ||||
|                 'vcodec': format_m.group('vcodec'), | ||||
|                 'abr': abr, | ||||
|                 'vbr': vbr, | ||||
|                 'width': int(fnode.find('./width').text), | ||||
|                 'height': int(fnode.find('./height').text), | ||||
|                 'filesize': int(fnode.find('./filesize').text), | ||||
|                 'format_note': format_note, | ||||
|                 '_pref': pref, | ||||
|                 '_available': is_available, | ||||
|             } | ||||
|  | ||||
|         MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' | ||||
|         RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' | ||||
|  | ||||
|         mobj = re.search(self._MEDIA_STREAM, media_link) | ||||
|         if mobj is None: | ||||
|             mobj = re.search(RTSP_STREAM, media_link) | ||||
|             if mobj is None: | ||||
|                 raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL') | ||||
|         video_url = mobj.group('video_url') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1(?: class="beitragHeadline")?>(.*?)</h1>', | ||||
|             html, u'title') | ||||
|         format_nodes = doc.findall('.//formitaeten/formitaet') | ||||
|         formats = sorted(filter(lambda f: f['_available'], | ||||
|                                 map(xml_to_format, format_nodes)), | ||||
|                          key=operator.itemgetter('_pref')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'ext': determine_ext(video_url) | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|   | ||||
| @@ -2,11 +2,15 @@ import io | ||||
| import json | ||||
| import traceback | ||||
| import hashlib | ||||
| import os | ||||
| import subprocess | ||||
| import sys | ||||
| from zipimport import zipimporter | ||||
|  | ||||
| from .utils import * | ||||
| from .utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from .version import __version__ | ||||
|  | ||||
| def rsa_verify(message, signature, key): | ||||
| @@ -37,6 +41,7 @@ def rsa_verify(message, signature, key): | ||||
|     if signature != sha256(message).digest(): return False | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def update_self(to_screen, verbose): | ||||
|     """Update the program file with the latest version from the repository""" | ||||
|  | ||||
| @@ -78,6 +83,13 @@ def update_self(to_screen, verbose): | ||||
|         return | ||||
|  | ||||
|     version_id = versions_info['latest'] | ||||
|  | ||||
|     def version_tuple(version_str): | ||||
|         return tuple(map(int, version_str.split('.'))) | ||||
|     if version_tuple(__version__) >= version_tuple(version_id): | ||||
|         to_screen(u'youtube-dl is up to date (%s)' % __version__) | ||||
|         return | ||||
|  | ||||
|     to_screen(u'Updating to version ' + version_id + '...') | ||||
|     version = versions_info['versions'][version_id] | ||||
|  | ||||
| @@ -105,7 +117,7 @@ def update_self(to_screen, verbose): | ||||
|             urlh = compat_urllib_request.urlopen(version['exe'][0]) | ||||
|             newcontent = urlh.read() | ||||
|             urlh.close() | ||||
|         except (IOError, OSError) as err: | ||||
|         except (IOError, OSError): | ||||
|             if verbose: to_screen(compat_str(traceback.format_exc())) | ||||
|             to_screen(u'ERROR: unable to download latest version') | ||||
|             return | ||||
| @@ -118,7 +130,7 @@ def update_self(to_screen, verbose): | ||||
|         try: | ||||
|             with open(exe + '.new', 'wb') as outf: | ||||
|                 outf.write(newcontent) | ||||
|         except (IOError, OSError) as err: | ||||
|         except (IOError, OSError): | ||||
|             if verbose: to_screen(compat_str(traceback.format_exc())) | ||||
|             to_screen(u'ERROR: unable to write the new version') | ||||
|             return | ||||
| @@ -137,7 +149,7 @@ start /b "" cmd /c del "%%~f0"&exit /b" | ||||
|  | ||||
|             subprocess.Popen([bat])  # Continues to run in the background | ||||
|             return  # Do not show premature success messages | ||||
|         except (IOError, OSError) as err: | ||||
|         except (IOError, OSError): | ||||
|             if verbose: to_screen(compat_str(traceback.format_exc())) | ||||
|             to_screen(u'ERROR: unable to overwrite current version') | ||||
|             return | ||||
| @@ -148,7 +160,7 @@ start /b "" cmd /c del "%%~f0"&exit /b" | ||||
|             urlh = compat_urllib_request.urlopen(version['bin'][0]) | ||||
|             newcontent = urlh.read() | ||||
|             urlh.close() | ||||
|         except (IOError, OSError) as err: | ||||
|         except (IOError, OSError): | ||||
|             if verbose: to_screen(compat_str(traceback.format_exc())) | ||||
|             to_screen(u'ERROR: unable to download latest version') | ||||
|             return | ||||
| @@ -161,7 +173,7 @@ start /b "" cmd /c del "%%~f0"&exit /b" | ||||
|         try: | ||||
|             with open(filename, 'wb') as outf: | ||||
|                 outf.write(newcontent) | ||||
|         except (IOError, OSError) as err: | ||||
|         except (IOError, OSError): | ||||
|             if verbose: to_screen(compat_str(traceback.format_exc())) | ||||
|             to_screen(u'ERROR: unable to overwrite current version') | ||||
|             return | ||||
|   | ||||
| @@ -8,13 +8,16 @@ import gzip | ||||
| import io | ||||
| import json | ||||
| import locale | ||||
| import math | ||||
| import os | ||||
| import pipes | ||||
| import platform | ||||
| import re | ||||
| import ssl | ||||
| import socket | ||||
| import sys | ||||
| import traceback | ||||
| import xml.etree.ElementTree | ||||
| import zlib | ||||
|  | ||||
| try: | ||||
| @@ -535,17 +538,34 @@ def formatSeconds(secs): | ||||
|     else: | ||||
|         return '%d' % secs | ||||
|  | ||||
| def make_HTTPS_handler(opts): | ||||
|     if sys.version_info < (3,2): | ||||
|         # Python's 2.x handler is very simplistic | ||||
|         return compat_urllib_request.HTTPSHandler() | ||||
| def make_HTTPS_handler(opts_no_check_certificate): | ||||
|     if sys.version_info < (3, 2): | ||||
|         import httplib | ||||
|  | ||||
|         class HTTPSConnectionV3(httplib.HTTPSConnection): | ||||
|             def __init__(self, *args, **kwargs): | ||||
|                 httplib.HTTPSConnection.__init__(self, *args, **kwargs) | ||||
|  | ||||
|             def connect(self): | ||||
|                 sock = socket.create_connection((self.host, self.port), self.timeout) | ||||
|                 if self._tunnel_host: | ||||
|                     self.sock = sock | ||||
|                     self._tunnel() | ||||
|                 try: | ||||
|                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3) | ||||
|                 except ssl.SSLError: | ||||
|                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23) | ||||
|  | ||||
|         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler): | ||||
|             def https_open(self, req): | ||||
|                 return self.do_open(HTTPSConnectionV3, req) | ||||
|         return HTTPSHandlerV3() | ||||
|     else: | ||||
|         import ssl | ||||
|         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) | ||||
|         context = ssl.SSLContext(ssl.PROTOCOL_SSLv3) | ||||
|         context.set_default_verify_paths() | ||||
|          | ||||
|         context.verify_mode = (ssl.CERT_NONE | ||||
|                                if opts.no_check_certificate | ||||
|                                if opts_no_check_certificate | ||||
|                                else ssl.CERT_REQUIRED) | ||||
|         return compat_urllib_request.HTTPSHandler(context=context) | ||||
|  | ||||
| @@ -734,6 +754,8 @@ def unified_strdate(date_str): | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%d.%m.%Y %H:%M', | ||||
|         '%Y-%m-%dT%H:%M:%SZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|         '%Y-%m-%dT%H:%M:%S', | ||||
|     ] | ||||
|     for expression in format_expressions: | ||||
| @@ -949,7 +971,16 @@ class locked_file(object): | ||||
|  | ||||
|  | ||||
| def shell_quote(args): | ||||
|     return ' '.join(map(pipes.quote, args)) | ||||
|     quoted_args = [] | ||||
|     encoding = sys.getfilesystemencoding() | ||||
|     if encoding is None: | ||||
|         encoding = 'utf-8' | ||||
|     for a in args: | ||||
|         if isinstance(a, bytes): | ||||
|             # We may get a filename encoded with 'encodeFilename' | ||||
|             a = a.decode(encoding) | ||||
|         quoted_args.append(pipes.quote(a)) | ||||
|     return u' '.join(quoted_args) | ||||
|  | ||||
|  | ||||
| def takewhile_inclusive(pred, seq): | ||||
| @@ -976,3 +1007,22 @@ def unsmuggle_url(smug_url): | ||||
|     jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] | ||||
|     data = json.loads(jsond) | ||||
|     return url, data | ||||
|  | ||||
|  | ||||
| def parse_xml_doc(s): | ||||
|     assert isinstance(s, type(u'')) | ||||
|     return xml.etree.ElementTree.fromstring(s.encode('utf-8')) | ||||
|  | ||||
|  | ||||
| def format_bytes(bytes): | ||||
|     if bytes is None: | ||||
|         return u'N/A' | ||||
|     if type(bytes) is str: | ||||
|         bytes = float(bytes) | ||||
|     if bytes == 0.0: | ||||
|         exponent = 0 | ||||
|     else: | ||||
|         exponent = int(math.log(bytes, 1024.0)) | ||||
|     suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent] | ||||
|     converted = float(bytes) / float(1024 ** exponent) | ||||
|     return u'%.2f%s' % (converted, suffix) | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2013.11.13' | ||||
| __version__ = '2013.11.25.3' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user