Compare commits
	
		
			114 Commits
		
	
	
		
			2013.07.18
			...
			2013.08.14
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					d1ba998274 | ||
| 
						 | 
					718ced8d8c | ||
| 
						 | 
					e1842025d0 | ||
| 
						 | 
					0577177e3e | ||
| 
						 | 
					298f833b16 | ||
| 
						 | 
					0f399e6e5e | ||
| 
						 | 
					5b075e27cb | ||
| 
						 | 
					8a9d86a2a7 | ||
| 
						 | 
					d468a09789 | ||
| 
						 | 
					9f4ab73d7f | ||
| 
						 | 
					02cf62e240 | ||
| 
						 | 
					67fb0c5495 | ||
| 
						 | 
					4efba05c56 | ||
| 
						 | 
					0f90943e45 | ||
| 
						 | 
					526e638c8a | ||
| 
						 | 
					356e067390 | ||
| 
						 | 
					e2f48f9643 | ||
| 
						 | 
					b513a251f8 | ||
| 
						 | 
					36cb11f068 | ||
| 
						 | 
					7a4c6cc92f | ||
| 
						 | 
					7edcb8f39c | ||
| 
						 | 
					39b782b390 | ||
| 
						 | 
					577664c8e8 | ||
| 
						 | 
					bba12cec89 | ||
| 
						 | 
					70c4c03cb8 | ||
| 
						 | 
					f5791ed136 | ||
| 
						 | 
					fbf189a6ee | ||
| 
						 | 
					09825cb5c0 | ||
| 
						 | 
					ed27d35674 | ||
| 
						 | 
					fd5539eb41 | ||
| 
						 | 
					04bca64bde | ||
| 
						 | 
					03cc7c20c1 | ||
| 
						 | 
					4075311d94 | ||
| 
						 | 
					86fe61c8f9 | ||
| 
						 | 
					9bb6d2f21d | ||
| 
						 | 
					e3f4593e76 | ||
| 
						 | 
					1d043b93cf | ||
| 
						 | 
					b15d4f624f | ||
| 
						 | 
					4aa16a50f5 | ||
| 
						 | 
					bbcbf4d459 | ||
| 
						 | 
					930ad9eecc | ||
| 
						 | 
					b072a9defd | ||
| 
						 | 
					75952c6e3d | ||
| 
						 | 
					05afc96b73 | ||
| 
						 | 
					fa80026915 | ||
| 
						 | 
					2bc3de0f28 | ||
| 
						 | 
					99c7bc94af | ||
| 
						 | 
					152c8f349d | ||
| 
						 | 
					d75654c15e | ||
| 
						 | 
					0725f584e1 | ||
| 
						 | 
					8cda9241d1 | ||
| 
						 | 
					a3124ba49f | ||
| 
						 | 
					579e2691fe | ||
| 
						 | 
					63f05de10b | ||
| 
						 | 
					caeefc29eb | ||
| 
						 | 
					a3c736def2 | ||
| 
						 | 
					58261235f0 | ||
| 
						 | 
					da70877a1b | ||
| 
						 | 
					5c468ca8a8 | ||
| 
						 | 
					aedd6bb97d | ||
| 
						 | 
					733d9cacb8 | ||
| 
						 | 
					42f2805e48 | ||
| 
						 | 
					0ffcb7c6fc | ||
| 
						 | 
					27669bd11d | ||
| 
						 | 
					6625f82940 | ||
| 
						 | 
					d0866f0bb4 | ||
| 
						 | 
					09eeb75130 | ||
| 
						 | 
					0a99956f71 | ||
| 
						 | 
					12ef6aefa8 | ||
| 
						 | 
					e93aa81aa6 | ||
| 
						 | 
					755eb0320e | ||
| 
						 | 
					43ba5456b1 | ||
| 
						 | 
					156d5ad6da | ||
| 
						 | 
					c626a3d9fa | ||
| 
						 | 
					b2e8bc1b20 | ||
| 
						 | 
					771822ebb8 | ||
| 
						 | 
					eb6a41ba0f | ||
| 
						 | 
					7d2392691c | ||
| 
						 | 
					c216c1894d | ||
| 
						 | 
					3e1ad508eb | ||
| 
						 | 
					a052c1d785 | ||
| 
						 | 
					16484d4923 | ||
| 
						 | 
					32a09b4382 | ||
| 
						 | 
					870a7e6156 | ||
| 
						 | 
					239e3e0cca | ||
| 
						 | 
					b1ca5e3ffa | ||
| 
						 | 
					b9a1252c96 | ||
| 
						 | 
					fc492de31d | ||
| 
						 | 
					a9c0f9bc63 | ||
| 
						 | 
					b7cc9f5026 | ||
| 
						 | 
					252580c561 | ||
| 
						 | 
					acc47c1a3f | ||
| 
						 | 
					70fa830e4d | ||
| 
						 | 
					a7af0ebaf5 | ||
| 
						 | 
					67ae7b4760 | ||
| 
						 | 
					de48addae2 | ||
| 
						 | 
					ddbfd0f0c5 | ||
| 
						 | 
					d7ae0639b4 | ||
| 
						 | 
					0382435990 | ||
| 
						 | 
					b390d85d95 | ||
| 
						 | 
					be925dc64c | ||
| 
						 | 
					de7a91bfe3 | ||
| 
						 | 
					a4358cbabd | ||
| 
						 | 
					177ed935a9 | ||
| 
						 | 
					c364f15ff1 | ||
| 
						 | 
					e1f6e61e6a | ||
| 
						 | 
					0932300e3a | ||
| 
						 | 
					3f40217704 | ||
| 
						 | 
					f631c3311a | ||
| 
						 | 
					8e5e059d7d | ||
| 
						 | 
					2b1b511f6b | ||
| 
						 | 
					233ad24ecf | ||
| 
						 | 
					c4949c50f9 | ||
| 
						 | 
					b6ef402905 | 
@@ -9,6 +9,7 @@ notifications:
 | 
			
		||||
    - filippo.valsorda@gmail.com
 | 
			
		||||
    - phihag@phihag.de
 | 
			
		||||
    - jaime.marquinez.ferrandiz+travis@gmail.com
 | 
			
		||||
    - yasoob.khld@gmail.com
 | 
			
		||||
#  irc:
 | 
			
		||||
#    channels:
 | 
			
		||||
#      - "irc.freenode.org#youtube-dl"
 | 
			
		||||
 
 | 
			
		||||
@@ -16,7 +16,9 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
# OPTIONS
 | 
			
		||||
    -h, --help                 print this help text and exit
 | 
			
		||||
    --version                  print program version and exit
 | 
			
		||||
    -U, --update               update this program to latest version
 | 
			
		||||
    -U, --update               update this program to latest version. Make sure
 | 
			
		||||
                               that you have sufficient permissions (run with
 | 
			
		||||
                               sudo if needed)
 | 
			
		||||
    -i, --ignore-errors        continue on download errors
 | 
			
		||||
    --dump-user-agent          display the current browser identification
 | 
			
		||||
    --user-agent UA            specify a custom user agent
 | 
			
		||||
 
 | 
			
		||||
@@ -14,27 +14,36 @@ tests = [
 | 
			
		||||
    # 88
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
 | 
			
		||||
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
 | 
			
		||||
    # 87
 | 
			
		||||
    # 87 - vflART1Nf 2013/07/24
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
 | 
			
		||||
     "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
 | 
			
		||||
    # 86 - vfl_ymO4Z 2013/06/27
 | 
			
		||||
     "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
 | 
			
		||||
    # 86 - vflm_D8eE 2013/07/31
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
 | 
			
		||||
     "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
 | 
			
		||||
    # 85
 | 
			
		||||
     ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"),
 | 
			
		||||
    # 85 - vflSAFCP9 2013/07/19
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
 | 
			
		||||
     "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
 | 
			
		||||
     "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
 | 
			
		||||
    # 84
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
 | 
			
		||||
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
 | 
			
		||||
    # 83 - vflcaqGO8 2013/07/11
 | 
			
		||||
    # 83 - vflTWC9KW 2013/08/01
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
 | 
			
		||||
     "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
 | 
			
		||||
     "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
 | 
			
		||||
    # 82
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
 | 
			
		||||
     "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
 | 
			
		||||
    # 81
 | 
			
		||||
    # 81 - vflLC8JvQ 2013/07/25
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
 | 
			
		||||
     "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
 | 
			
		||||
     "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
 | 
			
		||||
    # 79 - vflLC8JvQ 2013/07/25 (sporadic)
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
 | 
			
		||||
     "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
tests_age_gate = [
 | 
			
		||||
    # 86 - vflqinMWD
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
 | 
			
		||||
     "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
def find_matching(wrong, right):
 | 
			
		||||
@@ -87,6 +96,8 @@ def genall(tests):
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    print(genall(tests))
 | 
			
		||||
    print(u'    Age gate:')
 | 
			
		||||
    print(genall(tests_age_gate))
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										38
									
								
								test/test_playlists.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								test/test_playlists.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
import unittest
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
import os
 | 
			
		||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
 | 
			
		||||
from youtube_dl.utils import *
 | 
			
		||||
 | 
			
		||||
from helper import FakeYDL
 | 
			
		||||
 | 
			
		||||
class TestPlaylists(unittest.TestCase):
 | 
			
		||||
    def assertIsPlaylist(self, info):
 | 
			
		||||
        """Make sure the info has '_type' set to 'playlist'"""
 | 
			
		||||
        self.assertEqual(info['_type'], 'playlist')
 | 
			
		||||
 | 
			
		||||
    def test_dailymotion_playlist(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = DailymotionPlaylistIE(dl)
 | 
			
		||||
        result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['title'], u'SPORT')
 | 
			
		||||
        self.assertTrue(len(result['entries']) > 20)
 | 
			
		||||
 | 
			
		||||
    def test_vimeo_channel(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = VimeoChannelIE(dl)
 | 
			
		||||
        result = ie.extract('http://vimeo.com/channels/tributes')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['title'], u'Vimeo Tributes')
 | 
			
		||||
        self.assertTrue(len(result['entries']) > 24)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -1,67 +0,0 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
import os
 | 
			
		||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
from youtube_dl.extractor.youtube import YoutubeIE
 | 
			
		||||
from helper import FakeYDL
 | 
			
		||||
 | 
			
		||||
sig = YoutubeIE(FakeYDL())._decrypt_signature
 | 
			
		||||
 | 
			
		||||
class TestYoutubeSig(unittest.TestCase):
 | 
			
		||||
    def test_92(self):
 | 
			
		||||
        wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
 | 
			
		||||
        right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_90(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
 | 
			
		||||
        right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_88(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
 | 
			
		||||
        right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_87(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
 | 
			
		||||
        right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_86(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
 | 
			
		||||
        right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_85(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
 | 
			
		||||
        right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_84(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
 | 
			
		||||
        right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_83(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
 | 
			
		||||
        right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_82(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
 | 
			
		||||
        right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
    def test_81(self):
 | 
			
		||||
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
 | 
			
		||||
        right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
 | 
			
		||||
        self.assertEqual(sig(wrong), right)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -329,6 +329,35 @@ class FileDownloader(object):
 | 
			
		||||
            self.report_error(u'mplayer exited with code %d' % retval)
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
    def _download_m3u8_with_ffmpeg(self, filename, url):
 | 
			
		||||
        self.report_destination(filename)
 | 
			
		||||
        tmpfilename = self.temp_name(filename)
 | 
			
		||||
 | 
			
		||||
        args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
 | 
			
		||||
        # Check for ffmpeg first
 | 
			
		||||
        try:
 | 
			
		||||
            subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 | 
			
		||||
        except (OSError, IOError):
 | 
			
		||||
            self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        retval = subprocess.call(args)
 | 
			
		||||
        if retval == 0:
 | 
			
		||||
            fsize = os.path.getsize(encodeFilename(tmpfilename))
 | 
			
		||||
            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
 | 
			
		||||
            self.try_rename(tmpfilename, filename)
 | 
			
		||||
            self._hook_progress({
 | 
			
		||||
                'downloaded_bytes': fsize,
 | 
			
		||||
                'total_bytes': fsize,
 | 
			
		||||
                'filename': filename,
 | 
			
		||||
                'status': 'finished',
 | 
			
		||||
            })
 | 
			
		||||
            return True
 | 
			
		||||
        else:
 | 
			
		||||
            self.to_stderr(u"\n")
 | 
			
		||||
            self.report_error(u'ffmpeg exited with code %d' % retval)
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def _do_download(self, filename, info_dict):
 | 
			
		||||
        url = info_dict['url']
 | 
			
		||||
@@ -354,6 +383,10 @@ class FileDownloader(object):
 | 
			
		||||
        if url.startswith('mms') or url.startswith('rtsp'):
 | 
			
		||||
            return self._download_with_mplayer(filename, url)
 | 
			
		||||
 | 
			
		||||
        # m3u8 manifest are downloaded with ffmpeg
 | 
			
		||||
        if determine_ext(url) == u'm3u8':
 | 
			
		||||
            return self._download_m3u8_with_ffmpeg(filename, url)
 | 
			
		||||
 | 
			
		||||
        tmpfilename = self.temp_name(filename)
 | 
			
		||||
        stream = None
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -100,7 +100,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 | 
			
		||||
        self._nopostoverwrites = nopostoverwrites
 | 
			
		||||
 | 
			
		||||
    def get_audio_codec(self, path):
 | 
			
		||||
        if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
 | 
			
		||||
        if not self._exes['ffprobe'] and not self._exes['avprobe']:
 | 
			
		||||
            raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
 | 
			
		||||
        try:
 | 
			
		||||
            cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
 | 
			
		||||
            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
 | 
			
		||||
@@ -208,7 +209,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 | 
			
		||||
            try:
 | 
			
		||||
                os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
 | 
			
		||||
            except:
 | 
			
		||||
                self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
 | 
			
		||||
                self._downloader.report_warning(u'Cannot update utime of audio file')
 | 
			
		||||
 | 
			
		||||
        information['filepath'] = new_path
 | 
			
		||||
        return self._nopostoverwrites,information
 | 
			
		||||
 
 | 
			
		||||
@@ -264,7 +264,7 @@ class YoutubeDL(object):
 | 
			
		||||
            self.report_error(u'Erroneous output template')
 | 
			
		||||
            return None
 | 
			
		||||
        except ValueError as err:
 | 
			
		||||
            self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
 | 
			
		||||
            self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def _match_entry(self, info_dict):
 | 
			
		||||
@@ -547,7 +547,7 @@ class YoutubeDL(object):
 | 
			
		||||
                try:
 | 
			
		||||
                    success = self.fd._do_download(filename, info_dict)
 | 
			
		||||
                except (OSError, IOError) as err:
 | 
			
		||||
                    raise UnavailableVideoError()
 | 
			
		||||
                    raise UnavailableVideoError(err)
 | 
			
		||||
                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
                    self.report_error(u'unable to download video data: %s' % str(err))
 | 
			
		||||
                    return
 | 
			
		||||
@@ -594,7 +594,7 @@ class YoutubeDL(object):
 | 
			
		||||
                        # No clear decision yet, let IE decide
 | 
			
		||||
                        keep_video = keep_video_wish
 | 
			
		||||
            except PostProcessingError as e:
 | 
			
		||||
                self.to_stderr(u'ERROR: ' + e.msg)
 | 
			
		||||
                self.report_error(e.msg)
 | 
			
		||||
        if keep_video is False and not self.params.get('keepvideo', False):
 | 
			
		||||
            try:
 | 
			
		||||
                self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 | 
			
		||||
 
 | 
			
		||||
@@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    general.add_option('-v', '--version',
 | 
			
		||||
            action='version', help='print program version and exit')
 | 
			
		||||
    general.add_option('-U', '--update',
 | 
			
		||||
            action='store_true', dest='update_self', help='update this program to latest version')
 | 
			
		||||
            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
 | 
			
		||||
    general.add_option('-i', '--ignore-errors',
 | 
			
		||||
            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
 | 
			
		||||
    general.add_option('--dump-user-agent',
 | 
			
		||||
@@ -398,6 +398,8 @@ def _real_main(argv=None):
 | 
			
		||||
            batchurls = batchfd.readlines()
 | 
			
		||||
            batchurls = [x.strip() for x in batchurls]
 | 
			
		||||
            batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
 | 
			
		||||
            if opts.verbose:
 | 
			
		||||
                sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
 | 
			
		||||
        except IOError:
 | 
			
		||||
            sys.exit(u'ERROR: batch file could not be read')
 | 
			
		||||
    all_urls = batchurls + args
 | 
			
		||||
 
 | 
			
		||||
@@ -12,13 +12,14 @@ from .comedycentral import ComedyCentralIE
 | 
			
		||||
from .condenast import CondeNastIE
 | 
			
		||||
from .criterion import CriterionIE
 | 
			
		||||
from .cspan import CSpanIE
 | 
			
		||||
from .dailymotion import DailymotionIE
 | 
			
		||||
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
 | 
			
		||||
from .depositfiles import DepositFilesIE
 | 
			
		||||
from .dotsub import DotsubIE
 | 
			
		||||
from .dreisat import DreiSatIE
 | 
			
		||||
from .ehow import EHowIE
 | 
			
		||||
from .eighttracks import EightTracksIE
 | 
			
		||||
from .escapist import EscapistIE
 | 
			
		||||
from .exfm import ExfmIE
 | 
			
		||||
from .facebook import FacebookIE
 | 
			
		||||
from .flickr import FlickrIE
 | 
			
		||||
from .freesound import FreesoundIE
 | 
			
		||||
@@ -37,20 +38,25 @@ from .infoq import InfoQIE
 | 
			
		||||
from .instagram import InstagramIE
 | 
			
		||||
from .jukebox import JukeboxIE
 | 
			
		||||
from .justintv import JustinTVIE
 | 
			
		||||
from .kankan import KankanIE
 | 
			
		||||
from .keek import KeekIE
 | 
			
		||||
from .liveleak import LiveLeakIE
 | 
			
		||||
from .livestream import LivestreamIE
 | 
			
		||||
from .metacafe import MetacafeIE
 | 
			
		||||
from .mixcloud import MixcloudIE
 | 
			
		||||
from .mtv import MTVIE
 | 
			
		||||
from .muzu import MuzuTVIE
 | 
			
		||||
from .myspass import MySpassIE
 | 
			
		||||
from .myvideo import MyVideoIE
 | 
			
		||||
from .nba import NBAIE
 | 
			
		||||
from .ooyala import OoyalaIE
 | 
			
		||||
from .photobucket import PhotobucketIE
 | 
			
		||||
from .pornotube import PornotubeIE
 | 
			
		||||
from .rbmaradio import RBMARadioIE
 | 
			
		||||
from .redtube import RedTubeIE
 | 
			
		||||
from .ringtv import RingTVIE
 | 
			
		||||
from .roxwel import RoxwelIE
 | 
			
		||||
from .sina import SinaIE
 | 
			
		||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
 | 
			
		||||
from .spiegel import SpiegelIE
 | 
			
		||||
from .stanfordoc import StanfordOpenClassroomIE
 | 
			
		||||
@@ -68,9 +74,12 @@ from .ustream import UstreamIE
 | 
			
		||||
from .vbox7 import Vbox7IE
 | 
			
		||||
from .veoh import VeohIE
 | 
			
		||||
from .vevo import VevoIE
 | 
			
		||||
from .vimeo import VimeoIE
 | 
			
		||||
from .videofyme import VideofyMeIE
 | 
			
		||||
from .vimeo import VimeoIE, VimeoChannelIE
 | 
			
		||||
from .vine import VineIE
 | 
			
		||||
from .c56 import C56IE
 | 
			
		||||
from .wat import WatIE
 | 
			
		||||
from .weibo import WeiboIE
 | 
			
		||||
from .wimp import WimpIE
 | 
			
		||||
from .worldstarhiphop import WorldStarHipHopIE
 | 
			
		||||
from .xhamster import XHamsterIE
 | 
			
		||||
@@ -88,6 +97,9 @@ from .youtube import (
 | 
			
		||||
    YoutubeChannelIE,
 | 
			
		||||
    YoutubeShowIE,
 | 
			
		||||
    YoutubeSubscriptionsIE,
 | 
			
		||||
    YoutubeRecommendedIE,
 | 
			
		||||
    YoutubeWatchLaterIE,
 | 
			
		||||
    YoutubeFavouritesIE,
 | 
			
		||||
)
 | 
			
		||||
from .zdf import ZDFIE
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
 | 
			
		||||
    """
 | 
			
		||||
    _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
 | 
			
		||||
    _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
 | 
			
		||||
    _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
 | 
			
		||||
    _LIVE_URL = r'index-[0-9]+\.html$'
 | 
			
		||||
 | 
			
		||||
    IE_NAME = u'arte.tv'
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
 | 
			
		||||
        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
 | 
			
		||||
 | 
			
		||||
    # TODO implement Live Stream
 | 
			
		||||
    # from ..utils import compat_urllib_parse
 | 
			
		||||
@@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
 | 
			
		||||
            lang = mobj.group('lang')
 | 
			
		||||
            return self._extract_video(url, id, lang)
 | 
			
		||||
 | 
			
		||||
        mobj = re.match(self._LIVEWEB_URL, url)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            name = mobj.group('name')
 | 
			
		||||
            lang = mobj.group('lang')
 | 
			
		||||
            return self._extract_liveweb(url, name, lang)
 | 
			
		||||
 | 
			
		||||
        if re.search(self._LIVE_URL, video_id) is not None:
 | 
			
		||||
            raise ExtractorError(u'Arte live streams are not yet supported, sorry')
 | 
			
		||||
            # self.extractLiveStream(url)
 | 
			
		||||
@@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        info_dict = {'id': player_info['VID'],
 | 
			
		||||
                     'title': player_info['VTI'],
 | 
			
		||||
                     'description': player_info['VDE'],
 | 
			
		||||
                     'description': player_info.get('VDE'),
 | 
			
		||||
                     'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
 | 
			
		||||
                     'thumbnail': player_info['programImage'],
 | 
			
		||||
                     'ext': 'flv',
 | 
			
		||||
@@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
 | 
			
		||||
                l = 'F'
 | 
			
		||||
            elif lang == 'de':
 | 
			
		||||
                l = 'A'
 | 
			
		||||
            regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
 | 
			
		||||
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
 | 
			
		||||
            return any(re.match(r, f['versionCode']) for r in regexes)
 | 
			
		||||
        # Some formats may not be in the same language as the url
 | 
			
		||||
        formats = filter(_match_lang, formats)
 | 
			
		||||
        # We order the formats by quality
 | 
			
		||||
        formats = sorted(formats, key=lambda f: int(f['height']))
 | 
			
		||||
        # Prefer videos without subtitles in the same language
 | 
			
		||||
        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
 | 
			
		||||
        # Pick the best quality
 | 
			
		||||
        format_info = formats[-1]
 | 
			
		||||
        if format_info['mediaType'] == u'rtmp':
 | 
			
		||||
@@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    def _extract_liveweb(self, url, name, lang):
 | 
			
		||||
        """Extract form http://liveweb.arte.tv/"""
 | 
			
		||||
        webpage = self._download_webpage(url, name)
 | 
			
		||||
        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
 | 
			
		||||
        config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
 | 
			
		||||
                                            video_id, u'Downloading information')
 | 
			
		||||
        config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
 | 
			
		||||
        event_doc = config_doc.find('event')
 | 
			
		||||
        url_node = event_doc.find('video').find('urlHd')
 | 
			
		||||
        if url_node is None:
 | 
			
		||||
            url_node = video_doc.find('urlSd')
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': event_doc.find('name%s' % lang.capitalize()).text,
 | 
			
		||||
                'url': url_node.text.replace('MP4', 'mp4'),
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
                }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BreakIE(InfoExtractor):
 | 
			
		||||
@@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1).split("-")[-1]
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
 | 
			
		||||
        key = re.search(r"icon: '(.+?)',",webpage).group(1)
 | 
			
		||||
        final_url = str(video_url)+"?"+str(key)
 | 
			
		||||
        thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
 | 
			
		||||
        title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
 | 
			
		||||
        ext = video_url.split('.')[-1]
 | 
			
		||||
        embed_url = 'http://www.break.com/embed/%s' % video_id
 | 
			
		||||
        webpage = self._download_webpage(embed_url, video_id)
 | 
			
		||||
        info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
 | 
			
		||||
                                       u'info json', flags=re.DOTALL)
 | 
			
		||||
        info = json.loads(info_json)
 | 
			
		||||
        video_url = info['videoUri']
 | 
			
		||||
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
 | 
			
		||||
        if m_youtube is not None:
 | 
			
		||||
            return self.url_result(m_youtube.group(1), 'Youtube')
 | 
			
		||||
        final_url = video_url + '?' + info['AuthToken']
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':        video_id,
 | 
			
		||||
            'url':       final_url,
 | 
			
		||||
            'ext':       ext,
 | 
			
		||||
            'title':     title,
 | 
			
		||||
            'thumbnail': thumbnail_url,
 | 
			
		||||
            'ext':       determine_ext(final_url),
 | 
			
		||||
            'title':     info['contentName'],
 | 
			
		||||
            'thumbnail': info['thumbUri'],
 | 
			
		||||
        }]
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										36
									
								
								youtube_dl/extractor/c56.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								youtube_dl/extractor/c56.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
class C56IE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
 | 
			
		||||
    IE_NAME = u'56.com'
 | 
			
		||||
 | 
			
		||||
    _TEST ={
 | 
			
		||||
        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
 | 
			
		||||
        u'file': u'93440716.mp4',
 | 
			
		||||
        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'网事知多少 第32期:车怒',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
 | 
			
		||||
        text_id = mobj.group('textid')
 | 
			
		||||
        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
 | 
			
		||||
                                           text_id, u'Downloading video info')
 | 
			
		||||
        info = json.loads(info_page)['info']
 | 
			
		||||
        best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
 | 
			
		||||
        video_url = best_format['url']
 | 
			
		||||
 | 
			
		||||
        return {'id': info['vid'],
 | 
			
		||||
                'title': info['Subject'],
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': determine_ext(video_url),
 | 
			
		||||
                'thumbnail': info.get('bimg') or info.get('img'),
 | 
			
		||||
                }
 | 
			
		||||
@@ -1,26 +1,26 @@
 | 
			
		||||
import re
 | 
			
		||||
import socket
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_http_client,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
    _WORKING = False
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
 | 
			
		||||
 | 
			
		||||
    def report_manifest(self, video_id):
 | 
			
		||||
        """Report information extraction."""
 | 
			
		||||
        self.to_screen(u'%s: Downloading XML manifest' % video_id)
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
 | 
			
		||||
        u'file': u'6902724.mp4',
 | 
			
		||||
        u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Comic-Con Cosplay Catastrophe',
 | 
			
		||||
            u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
@@ -36,14 +36,16 @@ class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
 | 
			
		||||
        try:
 | 
			
		||||
            metaXml = compat_urllib_request.urlopen(xmlUrl).read()
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
 | 
			
		||||
        metaXml = self._download_webpage(xmlUrl, video_id,
 | 
			
		||||
                                         u'Downloading info XML',
 | 
			
		||||
                                         u'Unable to download video info XML')
 | 
			
		||||
 | 
			
		||||
        mdoc = xml.etree.ElementTree.fromstring(metaXml)
 | 
			
		||||
        try:
 | 
			
		||||
            videoNode = mdoc.findall('./video')[0]
 | 
			
		||||
            youtubeIdNode = videoNode.find('./youtubeID')
 | 
			
		||||
            if youtubeIdNode is not None:
 | 
			
		||||
                return self.url_result(youtubeIdNode.text, 'Youtube')
 | 
			
		||||
            info['description'] = videoNode.findall('./description')[0].text
 | 
			
		||||
            info['title'] = videoNode.findall('./caption')[0].text
 | 
			
		||||
            info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
 | 
			
		||||
@@ -52,11 +54,9 @@ class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
            raise ExtractorError(u'Invalid metadata XML file')
 | 
			
		||||
 | 
			
		||||
        manifest_url += '?hdcore=2.10.3'
 | 
			
		||||
        self.report_manifest(video_id)
 | 
			
		||||
        try:
 | 
			
		||||
            manifestXml = compat_urllib_request.urlopen(manifest_url).read()
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
 | 
			
		||||
        manifestXml = self._download_webpage(manifest_url, video_id,
 | 
			
		||||
                                             u'Downloading XML manifest',
 | 
			
		||||
                                             u'Unable to download video info XML')
 | 
			
		||||
 | 
			
		||||
        adoc = xml.etree.ElementTree.fromstring(manifestXml)
 | 
			
		||||
        try:
 | 
			
		||||
@@ -66,9 +66,8 @@ class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
        except IndexError as err:
 | 
			
		||||
            raise ExtractorError(u'Invalid manifest file')
 | 
			
		||||
 | 
			
		||||
        url_pr = compat_urllib_parse_urlparse(manifest_url)
 | 
			
		||||
        url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
 | 
			
		||||
        url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
 | 
			
		||||
 | 
			
		||||
        info['url'] = url
 | 
			
		||||
        info['ext'] = 'f4f'
 | 
			
		||||
        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
 | 
			
		||||
        info['ext'] = 'mp4'
 | 
			
		||||
        return [info]
 | 
			
		||||
 
 | 
			
		||||
@@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
 | 
			
		||||
                         (full-episodes/(?P<episode>.*)|
 | 
			
		||||
                          (?P<clip>
 | 
			
		||||
                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
 | 
			
		||||
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
 | 
			
		||||
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
 | 
			
		||||
                          (?P<interview>
 | 
			
		||||
                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
 | 
			
		||||
                     $"""
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
 | 
			
		||||
@@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
 | 
			
		||||
            else:
 | 
			
		||||
                epTitle = mobj.group('cntitle')
 | 
			
		||||
            dlNewest = False
 | 
			
		||||
        elif mobj.group('interview'):
 | 
			
		||||
            epTitle = mobj.group('interview_title')
 | 
			
		||||
            dlNewest = False
 | 
			
		||||
        else:
 | 
			
		||||
            dlNewest = not mobj.group('episode')
 | 
			
		||||
            if dlNewest:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,12 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
import itertools
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    get_element_by_attribute,
 | 
			
		||||
    get_element_by_id,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
@@ -77,3 +80,31 @@ class DailymotionIE(InfoExtractor):
 | 
			
		||||
            'ext':      video_extension,
 | 
			
		||||
            'thumbnail': info['thumbnail_url']
 | 
			
		||||
        }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DailymotionPlaylistIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
 | 
			
		||||
    _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        playlist_id =  mobj.group('id')
 | 
			
		||||
        video_ids = []
 | 
			
		||||
 | 
			
		||||
        for pagenum in itertools.count(1):
 | 
			
		||||
            webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
 | 
			
		||||
                                             playlist_id, u'Downloading page %s' % pagenum)
 | 
			
		||||
 | 
			
		||||
            playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
 | 
			
		||||
            video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
 | 
			
		||||
 | 
			
		||||
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
        entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
 | 
			
		||||
                   for video_id in video_ids]
 | 
			
		||||
        return {'_type': 'playlist',
 | 
			
		||||
                'id': playlist_id,
 | 
			
		||||
                'title': get_element_by_id(u'playlist_name', webpage),
 | 
			
		||||
                'entries': entries,
 | 
			
		||||
                }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/exfm.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/exfm.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,54 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ExfmIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = u'exfm'
 | 
			
		||||
    IE_DESC = u'ex.fm'
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
 | 
			
		||||
    _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'http://ex.fm/song/1bgtzg',
 | 
			
		||||
            u'file': u'95223130.mp3',
 | 
			
		||||
            u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u"title": u"We Can't Stop - Miley Cyrus",
 | 
			
		||||
                u"uploader": u"Miley Cyrus",
 | 
			
		||||
                u'upload_date': u'20130603',
 | 
			
		||||
                u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
 | 
			
		||||
            },
 | 
			
		||||
            u'note': u'Soundcloud song',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'http://ex.fm/song/wddt8',
 | 
			
		||||
            u'file': u'wddt8.mp3',
 | 
			
		||||
            u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u'title': u'Safe and Sound',
 | 
			
		||||
                u'uploader': u'Capital Cities',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        song_id = mobj.group(1)
 | 
			
		||||
        info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
 | 
			
		||||
        webpage = self._download_webpage(info_url, song_id)
 | 
			
		||||
        info = json.loads(webpage)
 | 
			
		||||
        song_url = info['song']['url']
 | 
			
		||||
        if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
 | 
			
		||||
            self.to_screen('Soundcloud song detected')
 | 
			
		||||
            return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':          song_id,
 | 
			
		||||
            'url':         song_url,
 | 
			
		||||
            'ext':         'mp3',
 | 
			
		||||
            'title':       info['song']['title'],
 | 
			
		||||
            'thumbnail':   info['song']['image']['large'],
 | 
			
		||||
            'uploader':    info['song']['artist'],
 | 
			
		||||
            'view_count':  info['song']['loved_count'],
 | 
			
		||||
        }]
 | 
			
		||||
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
class InaIE(InfoExtractor):
 | 
			
		||||
    """Information Extractor for Ina.fr"""
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
 | 
			
		||||
        u'file': u'I12055569.mp4',
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
 | 
			
		||||
        u'md5': u'0d2da106a9d2631273e192b372806516',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"uploader_id": u"naomipq", 
 | 
			
		||||
            u"title": u"Video by naomipq"
 | 
			
		||||
            u"title": u"Video by naomipq",
 | 
			
		||||
            u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        html_title = self._html_search_regex(
 | 
			
		||||
            r'<title>(.+?)</title>',
 | 
			
		||||
            webpage, u'title', flags=re.DOTALL)
 | 
			
		||||
        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
 | 
			
		||||
        uploader_id = self._html_search_regex(
 | 
			
		||||
            r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
 | 
			
		||||
            webpage, u'uploader id', fatal=False, flags=re.DOTALL)
 | 
			
		||||
        ext = 'mp4'
 | 
			
		||||
        uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
 | 
			
		||||
            webpage, u'uploader id', fatal=False)
 | 
			
		||||
        desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
 | 
			
		||||
            fatal=False)
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':        video_id,
 | 
			
		||||
            'url':       self._og_search_video_url(webpage),
 | 
			
		||||
            'ext':       ext,
 | 
			
		||||
            'title':     title,
 | 
			
		||||
            'ext':       'mp4',
 | 
			
		||||
            'title':     u'Video by %s' % uploader_id,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'uploader_id' : uploader_id
 | 
			
		||||
            'uploader_id' : uploader_id,
 | 
			
		||||
            'description': desc,
 | 
			
		||||
        }]
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/kankan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/kankan.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,37 @@
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class KankanIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
 | 
			
		||||
    
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
 | 
			
		||||
        u'file': u'48863.flv',
 | 
			
		||||
        u'md5': u'29aca1e47ae68fc28804aca89f29507e',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Ready To Go',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
 | 
			
		||||
        gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
 | 
			
		||||
 | 
			
		||||
        video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
 | 
			
		||||
                                                 video_id, u'Downloading video url info')
 | 
			
		||||
        ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
 | 
			
		||||
        path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
 | 
			
		||||
        video_url = 'http://%s%s' % (ip, path)
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': determine_ext(video_url),
 | 
			
		||||
                }
 | 
			
		||||
@@ -4,10 +4,10 @@ from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class KeekIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
 | 
			
		||||
    IE_NAME = u'keek'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
 | 
			
		||||
        u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
 | 
			
		||||
        u'file': u'NODfbab.mp4',
 | 
			
		||||
        u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										64
									
								
								youtube_dl/extractor/muzu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								youtube_dl/extractor/muzu.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,64 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MuzuTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
 | 
			
		||||
    IE_NAME = u'muzu.tv'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
 | 
			
		||||
        u'file': u'1981454.mp4',
 | 
			
		||||
        u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Cat Walk (Original Mix)',
 | 
			
		||||
            u'description': u'md5:90e868994de201b2570e4e5854e19420',
 | 
			
		||||
            u'uploader': u'MarcAshken featuring SOS',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        info_data = compat_urllib_parse.urlencode({'format': 'json',
 | 
			
		||||
                                                   'url': url,
 | 
			
		||||
                                                   })
 | 
			
		||||
        video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
 | 
			
		||||
                                                 video_id, u'Downloading video info')
 | 
			
		||||
        info = json.loads(video_info_page)
 | 
			
		||||
 | 
			
		||||
        player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
 | 
			
		||||
                                                  video_id, u'Downloading player info')
 | 
			
		||||
        video_info = json.loads(player_info_page)['videos'][0]
 | 
			
		||||
        for quality in ['1080' , '720', '480', '360']:
 | 
			
		||||
            if video_info.get('v%s' % quality):
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
        data = compat_urllib_parse.urlencode({'ai': video_id,
 | 
			
		||||
                                              # Even if each time you watch a video the hash changes,
 | 
			
		||||
                                              # it seems to work for different videos, and it will work
 | 
			
		||||
                                              # even if you use any non empty string as a hash
 | 
			
		||||
                                              'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
 | 
			
		||||
                                              'device': 'web',
 | 
			
		||||
                                              'qv': quality,
 | 
			
		||||
                                              })
 | 
			
		||||
        video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
 | 
			
		||||
                                                video_id, u'Downloading video url')
 | 
			
		||||
        video_url_info = json.loads(video_url_page)
 | 
			
		||||
        video_url = video_url_info['url']
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': info['title'],
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': determine_ext(video_url),
 | 
			
		||||
                'thumbnail': info['thumbnail_url'],
 | 
			
		||||
                'description': info['description'],
 | 
			
		||||
                'uploader': info['author_name'],
 | 
			
		||||
                }
 | 
			
		||||
@@ -2,11 +2,13 @@ import binascii
 | 
			
		||||
import base64
 | 
			
		||||
import hashlib
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_ord,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
@@ -16,7 +18,7 @@ from ..utils import (
 | 
			
		||||
class MyVideoIE(InfoExtractor):
 | 
			
		||||
    """Information Extractor for myvideo.de."""
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
 | 
			
		||||
    IE_NAME = u'myvideo'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
 | 
			
		||||
@@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
 | 
			
		||||
                'ext':      video_ext,
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
 | 
			
		||||
            response = self._download_webpage(request, video_id,
 | 
			
		||||
                                              u'Downloading video info')
 | 
			
		||||
            info = json.loads(base64.b64decode(response).decode('utf-8'))
 | 
			
		||||
            return {'id': video_id,
 | 
			
		||||
                    'title': info['title'],
 | 
			
		||||
                    'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
 | 
			
		||||
                    'play_path': info['filename'],
 | 
			
		||||
                    'ext': 'flv',
 | 
			
		||||
                    'thumbnail': info['thumbnail'][0]['url'],
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
        # try encxml
 | 
			
		||||
        mobj = re.search('var flashvars={(.+?)}', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										52
									
								
								youtube_dl/extractor/ooyala.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								youtube_dl/extractor/ooyala.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,52 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import unescapeHTML
 | 
			
		||||
 | 
			
		||||
class OoyalaIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
 | 
			
		||||
        u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
 | 
			
		||||
        u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
 | 
			
		||||
        u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
 | 
			
		||||
            u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _extract_result(self, info, more_info):
 | 
			
		||||
        return {'id': info['embedCode'],
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': unescapeHTML(info['title']),
 | 
			
		||||
                'url': info['url'],
 | 
			
		||||
                'description': unescapeHTML(more_info['description']),
 | 
			
		||||
                'thumbnail': more_info['promo'],
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        embedCode = mobj.group('id')
 | 
			
		||||
        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
 | 
			
		||||
        player = self._download_webpage(player_url, embedCode)
 | 
			
		||||
        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
 | 
			
		||||
                                        player, u'mobile player url')
 | 
			
		||||
        mobile_player = self._download_webpage(mobile_url, embedCode)
 | 
			
		||||
        videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
 | 
			
		||||
        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
 | 
			
		||||
        videos_info = json.loads(videos_info)
 | 
			
		||||
        videos_more_info =json.loads(videos_more_info)
 | 
			
		||||
 | 
			
		||||
        if videos_more_info.get('lineup'):
 | 
			
		||||
            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
 | 
			
		||||
            return {'_type': 'playlist',
 | 
			
		||||
                    'id': embedCode,
 | 
			
		||||
                    'title': unescapeHTML(videos_more_info['title']),
 | 
			
		||||
                    'entries': videos,
 | 
			
		||||
                    }
 | 
			
		||||
        else:
 | 
			
		||||
            return self._extract_result(videos_info[0], videos_more_info)
 | 
			
		||||
        
 | 
			
		||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/roxwel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/roxwel.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import unified_strdate, determine_ext
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RoxwelIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
 | 
			
		||||
        u'file': u'passionpittakeawalklive.flv',
 | 
			
		||||
        u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Take A Walk (live)',
 | 
			
		||||
            u'uploader': u'Passion Pit',
 | 
			
		||||
            u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
 | 
			
		||||
        },
 | 
			
		||||
        u'skip': u'Requires rtmpdump',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        filename = mobj.group('filename')
 | 
			
		||||
        info_url = 'http://www.roxwel.com/api/videos/%s' % filename
 | 
			
		||||
        info_page = self._download_webpage(info_url, filename,
 | 
			
		||||
                                           u'Downloading video info')
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(filename)
 | 
			
		||||
        info = json.loads(info_page)
 | 
			
		||||
        rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
 | 
			
		||||
        best_rate = rtmp_rates[-1]
 | 
			
		||||
        url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
 | 
			
		||||
        rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
 | 
			
		||||
        ext = determine_ext(rtmp_url)
 | 
			
		||||
        if ext == 'f4v':
 | 
			
		||||
            rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
 | 
			
		||||
 | 
			
		||||
        return {'id': filename,
 | 
			
		||||
                'title': info['title'],
 | 
			
		||||
                'url': rtmp_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'description': info['description'],
 | 
			
		||||
                'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
 | 
			
		||||
                'uploader': info['artist'],
 | 
			
		||||
                'uploader_id': info['artistname'],
 | 
			
		||||
                'upload_date': unified_strdate(info['dbdate']),
 | 
			
		||||
                }
 | 
			
		||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/sina.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/sina.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,67 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SinaIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
 | 
			
		||||
                        (
 | 
			
		||||
                            (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
 | 
			
		||||
                            |
 | 
			
		||||
                            # This is used by external sites like Weibo
 | 
			
		||||
                            (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
 | 
			
		||||
                        )
 | 
			
		||||
                  '''
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
 | 
			
		||||
        u'file': u'110028898.flv',
 | 
			
		||||
        u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
 | 
			
		||||
 | 
			
		||||
    def _extract_video(self, video_id):
 | 
			
		||||
        data = compat_urllib_parse.urlencode({'vid': video_id})
 | 
			
		||||
        url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
 | 
			
		||||
            video_id, u'Downloading video url')
 | 
			
		||||
        image_page = self._download_webpage(
 | 
			
		||||
            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
 | 
			
		||||
            video_id, u'Downloading thumbnail info')
 | 
			
		||||
        url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'url': url_doc.find('./durl/url').text,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': url_doc.find('./vname').text,
 | 
			
		||||
                'thumbnail': image_page.split('=')[1],
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        if mobj.group('token') is not None:
 | 
			
		||||
            # The video id is in the redirected url
 | 
			
		||||
            self.to_screen(u'Getting video id')
 | 
			
		||||
            request = compat_urllib_request.Request(url)
 | 
			
		||||
            request.get_method = lambda: 'HEAD'
 | 
			
		||||
            (_, urlh) = self._download_webpage_handle(request, 'NA', False)
 | 
			
		||||
            return self._real_extract(urlh.geturl())
 | 
			
		||||
        elif video_id is None:
 | 
			
		||||
            pseudo_id = mobj.group('pseudo_id')
 | 
			
		||||
            webpage = self._download_webpage(url, pseudo_id)
 | 
			
		||||
            video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
 | 
			
		||||
 | 
			
		||||
        return self._extract_video(video_id)
 | 
			
		||||
@@ -19,7 +19,11 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
       of the stream token and uid
 | 
			
		||||
     """
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
 | 
			
		||||
    _VALID_URL = r'''^(?:https?://)?
 | 
			
		||||
                    (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
 | 
			
		||||
                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
 | 
			
		||||
                    )
 | 
			
		||||
                    '''
 | 
			
		||||
    IE_NAME = u'soundcloud'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
 | 
			
		||||
@@ -33,59 +37,65 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
 | 
			
		||||
 | 
			
		||||
    def report_resolve(self, video_id):
 | 
			
		||||
        """Report information extraction."""
 | 
			
		||||
        self.to_screen(u'%s: Resolving id' % video_id)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _resolv_url(cls, url):
 | 
			
		||||
        return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
 | 
			
		||||
 | 
			
		||||
        # extract uploader (which is in the url)
 | 
			
		||||
        uploader = mobj.group(1)
 | 
			
		||||
        # extract simple title (uploader + slug of song title)
 | 
			
		||||
        slug_title =  mobj.group(2)
 | 
			
		||||
        full_title = '%s/%s' % (uploader, slug_title)
 | 
			
		||||
 | 
			
		||||
        self.report_resolve(full_title)
 | 
			
		||||
 | 
			
		||||
        url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
 | 
			
		||||
        resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
 | 
			
		||||
        info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
 | 
			
		||||
 | 
			
		||||
        info = json.loads(info_json)
 | 
			
		||||
    def _extract_info_dict(self, info, full_title=None):
 | 
			
		||||
        video_id = info['id']
 | 
			
		||||
        self.report_extraction(full_title)
 | 
			
		||||
        name = full_title or video_id
 | 
			
		||||
        self.report_extraction(name)
 | 
			
		||||
 | 
			
		||||
        streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
 | 
			
		||||
        stream_json = self._download_webpage(streams_url, full_title,
 | 
			
		||||
                                             u'Downloading stream definitions',
 | 
			
		||||
                                             u'unable to download stream definitions')
 | 
			
		||||
 | 
			
		||||
        streams = json.loads(stream_json)
 | 
			
		||||
        mediaURL = streams['http_mp3_128_url']
 | 
			
		||||
        upload_date = unified_strdate(info['created_at'])
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
        thumbnail = info['artwork_url']
 | 
			
		||||
        if thumbnail is not None:
 | 
			
		||||
            thumbnail = thumbnail.replace('-large', '-t500x500')
 | 
			
		||||
        return {
 | 
			
		||||
            'id':       info['id'],
 | 
			
		||||
            'url':      mediaURL,
 | 
			
		||||
            'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID,
 | 
			
		||||
            'uploader': info['user']['username'],
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'upload_date': unified_strdate(info['created_at']),
 | 
			
		||||
            'title':    info['title'],
 | 
			
		||||
            'ext':      u'mp3',
 | 
			
		||||
            'description': info['description'],
 | 
			
		||||
        }]
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
class SoundcloudSetIE(InfoExtractor):
 | 
			
		||||
    """Information extractor for soundcloud.com sets
 | 
			
		||||
       To access the media, the uid of the song and a stream token
 | 
			
		||||
       must be extracted from the page source and the script must make
 | 
			
		||||
       a request to media.soundcloud.com/crossdomain.xml. Then
 | 
			
		||||
       the media can be grabbed by requesting from an url composed
 | 
			
		||||
       of the stream token and uid
 | 
			
		||||
     """
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        track_id = mobj.group('track_id')
 | 
			
		||||
        if track_id is not None:
 | 
			
		||||
            info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
 | 
			
		||||
            full_title = track_id
 | 
			
		||||
        else:
 | 
			
		||||
            # extract uploader (which is in the url)
 | 
			
		||||
            uploader = mobj.group(1)
 | 
			
		||||
            # extract simple title (uploader + slug of song title)
 | 
			
		||||
            slug_title =  mobj.group(2)
 | 
			
		||||
            full_title = '%s/%s' % (uploader, slug_title)
 | 
			
		||||
    
 | 
			
		||||
            self.report_resolve(full_title)
 | 
			
		||||
    
 | 
			
		||||
            url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
 | 
			
		||||
            info_json_url = self._resolv_url(url)
 | 
			
		||||
        info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
 | 
			
		||||
 | 
			
		||||
        info = json.loads(info_json)
 | 
			
		||||
        return self._extract_info_dict(info, full_title)
 | 
			
		||||
 | 
			
		||||
class SoundcloudSetIE(SoundcloudIE):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
 | 
			
		||||
    IE_NAME = u'soundcloud:set'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
@@ -153,10 +163,6 @@ class SoundcloudSetIE(InfoExtractor):
 | 
			
		||||
        ]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def report_resolve(self, video_id):
 | 
			
		||||
        """Report information extraction."""
 | 
			
		||||
        self.to_screen(u'%s: Resolving id' % video_id)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
@@ -171,7 +177,7 @@ class SoundcloudSetIE(InfoExtractor):
 | 
			
		||||
        self.report_resolve(full_title)
 | 
			
		||||
 | 
			
		||||
        url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
 | 
			
		||||
        resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
 | 
			
		||||
        resolv_url = self._resolv_url(url)
 | 
			
		||||
        info_json = self._download_webpage(resolv_url, full_title)
 | 
			
		||||
 | 
			
		||||
        videos = []
 | 
			
		||||
@@ -182,23 +188,8 @@ class SoundcloudSetIE(InfoExtractor):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(full_title)
 | 
			
		||||
        for track in info['tracks']:
 | 
			
		||||
            video_id = track['id']
 | 
			
		||||
 | 
			
		||||
            streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
 | 
			
		||||
            stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON')
 | 
			
		||||
 | 
			
		||||
            self.report_extraction(video_id)
 | 
			
		||||
            streams = json.loads(stream_json)
 | 
			
		||||
            mediaURL = streams['http_mp3_128_url']
 | 
			
		||||
 | 
			
		||||
            videos.append({
 | 
			
		||||
                'id':       video_id,
 | 
			
		||||
                'url':      mediaURL,
 | 
			
		||||
                'uploader': track['user']['username'],
 | 
			
		||||
                'upload_date':  unified_strdate(track['created_at']),
 | 
			
		||||
                'title':    track['title'],
 | 
			
		||||
                'ext':      u'mp3',
 | 
			
		||||
                'description': track['description'],
 | 
			
		||||
            })
 | 
			
		||||
        return videos
 | 
			
		||||
        return {'_type': 'playlist',
 | 
			
		||||
                'entries': [self._extract_info_dict(track) for track in info['tracks']],
 | 
			
		||||
                'id': info['id'],
 | 
			
		||||
                'title': info['title'],
 | 
			
		||||
                }
 | 
			
		||||
 
 | 
			
		||||
@@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor):
 | 
			
		||||
        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 | 
			
		||||
        data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
 | 
			
		||||
 | 
			
		||||
        video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
 | 
			
		||||
        video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
 | 
			
		||||
            data, u'video URL')
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
 
 | 
			
		||||
@@ -67,7 +67,7 @@ class TEDIE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
 | 
			
		||||
        self.report_extraction(video_name)
 | 
			
		||||
        # If the url includes the language we get the title translated
 | 
			
		||||
        title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
 | 
			
		||||
        title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
 | 
			
		||||
                                        webpage, 'title')
 | 
			
		||||
        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
 | 
			
		||||
                                    webpage, 'json data')
 | 
			
		||||
 
 | 
			
		||||
@@ -6,19 +6,17 @@ import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
class TF1IE(InfoExtractor):
 | 
			
		||||
    """
 | 
			
		||||
    TF1 uses the wat.tv player, currently it can only download videos with the
 | 
			
		||||
    html5 player enabled, it cannot download HD videos.
 | 
			
		||||
    """
 | 
			
		||||
    """TF1 uses the wat.tv player."""
 | 
			
		||||
    _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
 | 
			
		||||
        u'file': u'10635995.mp4',
 | 
			
		||||
        u'md5': u'66789d3e91278d332f75e1feb7aea327',
 | 
			
		||||
        u'md5': u'2e378cc28b9957607d5e88f274e637d8',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
 | 
			
		||||
            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
 | 
			
		||||
        }
 | 
			
		||||
        },
 | 
			
		||||
        u'skip': u'Sometimes wat serves the whole file with the --test option',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -4,11 +4,11 @@ from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TrailerAddictIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)'
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
 | 
			
		||||
        u'file': u'76184.mp4',
 | 
			
		||||
        u'md5': u'41365557f3c8c397d091da510e73ceb4',
 | 
			
		||||
        u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Prince Avalanche Trailer",
 | 
			
		||||
            u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
 | 
			
		||||
@@ -17,24 +17,30 @@ class TrailerAddictIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        
 | 
			
		||||
        name = mobj.group('movie') + '/' + mobj.group('trailer_name')
 | 
			
		||||
        webpage = self._download_webpage(url, name)
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(r'<title>(.+?)</title>',
 | 
			
		||||
                webpage, 'video title').replace(' - Trailer Addict','')
 | 
			
		||||
        view_count = self._search_regex(r'Views: (.+?)<br />',
 | 
			
		||||
                webpage, 'Views Count')
 | 
			
		||||
        video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
 | 
			
		||||
 | 
			
		||||
        info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
 | 
			
		||||
        # Presence of (no)watchplus function indicates HD quality is available
 | 
			
		||||
        if re.search(r'function (no)?watchplus()', webpage):
 | 
			
		||||
            fvar = "fvarhd"
 | 
			
		||||
        else:
 | 
			
		||||
            fvar = "fvar"
 | 
			
		||||
 | 
			
		||||
        info_url = "http://www.traileraddict.com/%s.php?tid=%s" % (fvar, str(video_id))
 | 
			
		||||
        info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        final_url = self._search_regex(r'&fileurl=(.+)',
 | 
			
		||||
                info_webpage, 'Download url').replace('%3F','?')
 | 
			
		||||
        thumbnail_url = self._search_regex(r'&image=(.+?)&',
 | 
			
		||||
                info_webpage, 'thumbnail url')
 | 
			
		||||
        ext = final_url.split('.')[-1].split('?')[0]
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
            'id'          : video_id,
 | 
			
		||||
            'url'         : final_url,
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class VevoIE(InfoExtractor):
 | 
			
		||||
    """
 | 
			
		||||
    Accecps urls from vevo.com or in the format 'vevo:{id}'
 | 
			
		||||
    Accepts urls from vevo.com or in the format 'vevo:{id}'
 | 
			
		||||
    (currently used by MTVIE)
 | 
			
		||||
    """
 | 
			
		||||
    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
 | 
			
		||||
@@ -19,7 +19,7 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"upload_date": u"20130624", 
 | 
			
		||||
            u"uploader": u"Hurts", 
 | 
			
		||||
            u"title": u"Somebody To Die For"
 | 
			
		||||
            u"title": u"Somebody to Die For"
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -35,12 +35,12 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        video_info = json.loads(info_json)
 | 
			
		||||
        m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage))
 | 
			
		||||
        m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
 | 
			
		||||
        if m_urls is None or len(m_urls) == 0:
 | 
			
		||||
            raise ExtractorError(u'Unable to extract video url')
 | 
			
		||||
        # They are sorted from worst to best quality
 | 
			
		||||
        m_url = m_urls[-1]
 | 
			
		||||
        video_url = base_url + m_url.group('url')
 | 
			
		||||
        video_url = base_url + '/' + m_url.group('url')
 | 
			
		||||
        ext = m_url.group('ext')
 | 
			
		||||
 | 
			
		||||
        return {'url': video_url,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/videofyme.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/videofyme.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class VideofyMeIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
 | 
			
		||||
    IE_NAME = u'videofy.me'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
 | 
			
		||||
        u'file':  u'1100701.mp4',
 | 
			
		||||
        u'md5': u'2046dd5758541d630bfa93e741e2fd79',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'This is VideofyMe',
 | 
			
		||||
            u'description': None,
 | 
			
		||||
            u'uploader': u'VideofyMe',
 | 
			
		||||
            u'uploader_id': u'thisisvideofyme',
 | 
			
		||||
        },
 | 
			
		||||
        
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
 | 
			
		||||
                                            video_id)
 | 
			
		||||
        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
 | 
			
		||||
        video = config.find('video')
 | 
			
		||||
        sources = video.find('sources')
 | 
			
		||||
        url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on')
 | 
			
		||||
        if url_node is None:
 | 
			
		||||
            url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
 | 
			
		||||
        video_url = url_node.find('url').text
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': video.find('title').text,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': determine_ext(video_url),
 | 
			
		||||
                'thumbnail': video.find('thumb').text,
 | 
			
		||||
                'description': video.find('description').text,
 | 
			
		||||
                'uploader': config.find('blog/name').text,
 | 
			
		||||
                'uploader_id': video.find('identifier').text,
 | 
			
		||||
                'view_count': re.search(r'\d+', video.find('views').text).group(),
 | 
			
		||||
                }
 | 
			
		||||
@@ -1,5 +1,6 @@
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
import itertools
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
@@ -171,3 +172,31 @@ class VimeoIE(InfoExtractor):
 | 
			
		||||
            'thumbnail':    video_thumbnail,
 | 
			
		||||
            'description':  video_description,
 | 
			
		||||
        }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VimeoChannelIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = u'vimeo:channel'
 | 
			
		||||
    _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
 | 
			
		||||
    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        channel_id =  mobj.group('id')
 | 
			
		||||
        video_ids = []
 | 
			
		||||
 | 
			
		||||
        for pagenum in itertools.count(1):
 | 
			
		||||
            webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
 | 
			
		||||
                                             channel_id, u'Downloading page %s' % pagenum)
 | 
			
		||||
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
 | 
			
		||||
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
 | 
			
		||||
                   for video_id in video_ids]
 | 
			
		||||
        channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
 | 
			
		||||
                                                webpage, u'channel title')
 | 
			
		||||
        return {'_type': 'playlist',
 | 
			
		||||
                'id': channel_id,
 | 
			
		||||
                'title': channel_title,
 | 
			
		||||
                'entries': entries,
 | 
			
		||||
                }
 | 
			
		||||
 
 | 
			
		||||
@@ -17,11 +17,12 @@ class WatIE(InfoExtractor):
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
 | 
			
		||||
        u'file': u'10631273.mp4',
 | 
			
		||||
        u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
 | 
			
		||||
        u'md5': u'd8b2231e1e333acd12aad94b80937e19',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'World War Z - Philadelphia VOST',
 | 
			
		||||
            u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
 | 
			
		||||
        }
 | 
			
		||||
        },
 | 
			
		||||
        u'skip': u'Sometimes wat serves the whole file with the --test option',
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    def download_video_info(self, real_id):
 | 
			
		||||
@@ -58,20 +59,8 @@ class WatIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Otherwise we can continue and extract just one part, we have to use
 | 
			
		||||
        # the short id for getting the video url
 | 
			
		||||
        player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
 | 
			
		||||
                                                     'html5': '1'})
 | 
			
		||||
        player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
 | 
			
		||||
                                             real_id, u'Downloading player info')
 | 
			
		||||
        player = json.loads(player_info)['player']
 | 
			
		||||
        html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
 | 
			
		||||
                                               'html5 player')
 | 
			
		||||
        player_webpage = self._download_webpage(html5_player, real_id,
 | 
			
		||||
                                                u'Downloading player webpage')
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
 | 
			
		||||
                                       'video url')
 | 
			
		||||
        info = {'id': real_id,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': first_chapter['title'],
 | 
			
		||||
                'thumbnail': first_chapter['preview'],
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/weibo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/weibo.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,48 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
class WeiboIE(InfoExtractor):
 | 
			
		||||
    """
 | 
			
		||||
    The videos in Weibo come from different sites, this IE just finds the link
 | 
			
		||||
    to the external video and returns it.
 | 
			
		||||
    """
 | 
			
		||||
    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
 | 
			
		||||
        u'file': u'98322879.flv',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'魔声耳机最新广告“All Eyes On Us”',
 | 
			
		||||
        },
 | 
			
		||||
        u'note': u'Sina video',
 | 
			
		||||
        u'params': {
 | 
			
		||||
            u'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Additional example videos from different sites
 | 
			
		||||
    # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
 | 
			
		||||
    # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
 | 
			
		||||
        info_page = self._download_webpage(info_url, video_id)
 | 
			
		||||
        info = json.loads(info_page)
 | 
			
		||||
 | 
			
		||||
        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
 | 
			
		||||
        #Prefer sina video since they have thumbnails
 | 
			
		||||
        videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
 | 
			
		||||
        player_url = videos_urls[-1]
 | 
			
		||||
        m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
 | 
			
		||||
        if m_sina is not None:
 | 
			
		||||
            self.to_screen('Sina video detected')
 | 
			
		||||
            sina_id = m_sina.group(1)
 | 
			
		||||
            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
 | 
			
		||||
        return self.url_result(player_url)
 | 
			
		||||
 | 
			
		||||
@@ -21,6 +21,13 @@ class WorldStarHipHopIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        webpage_src = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        m_vevo_id = re.search(r'videoId=(.*?)&?',
 | 
			
		||||
            webpage_src)
 | 
			
		||||
        
 | 
			
		||||
        if m_vevo_id is not None:
 | 
			
		||||
            self.to_screen(u'Vevo video detected:')
 | 
			
		||||
            return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
 | 
			
		||||
            webpage_src, u'video URL')
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoukuIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
 | 
			
		||||
    _VALID_URL =  r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)'
 | 
			
		||||
    _TEST =   {
 | 
			
		||||
        u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
 | 
			
		||||
        u"file": u"XNDgyMDQ2NTQw_part00.flv",
 | 
			
		||||
 
 | 
			
		||||
@@ -23,8 +23,114 @@ from ..utils import (
 | 
			
		||||
    orderedSet,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
			
		||||
    """Provide base functions for Youtube extractors"""
 | 
			
		||||
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 | 
			
		||||
    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 | 
			
		||||
    _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 | 
			
		||||
    _NETRC_MACHINE = 'youtube'
 | 
			
		||||
    # If True it will raise an error if no login info is provided
 | 
			
		||||
    _LOGIN_REQUIRED = False
 | 
			
		||||
 | 
			
		||||
class YoutubeIE(InfoExtractor):
 | 
			
		||||
    def report_lang(self):
 | 
			
		||||
        """Report attempt to set language."""
 | 
			
		||||
        self.to_screen(u'Setting language')
 | 
			
		||||
 | 
			
		||||
    def _set_language(self):
 | 
			
		||||
        request = compat_urllib_request.Request(self._LANG_URL)
 | 
			
		||||
        try:
 | 
			
		||||
            self.report_lang()
 | 
			
		||||
            compat_urllib_request.urlopen(request).read()
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
 | 
			
		||||
            return False
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def _login(self):
 | 
			
		||||
        (username, password) = self._get_login_info()
 | 
			
		||||
        # No authentication to be performed
 | 
			
		||||
        if username is None:
 | 
			
		||||
            if self._LOGIN_REQUIRED:
 | 
			
		||||
                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL)
 | 
			
		||||
        try:
 | 
			
		||||
            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        galx = None
 | 
			
		||||
        dsh = None
 | 
			
		||||
        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
 | 
			
		||||
        if match:
 | 
			
		||||
          galx = match.group(1)
 | 
			
		||||
        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
 | 
			
		||||
        if match:
 | 
			
		||||
          dsh = match.group(1)
 | 
			
		||||
 | 
			
		||||
        # Log in
 | 
			
		||||
        login_form_strs = {
 | 
			
		||||
                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
 | 
			
		||||
                u'Email': username,
 | 
			
		||||
                u'GALX': galx,
 | 
			
		||||
                u'Passwd': password,
 | 
			
		||||
                u'PersistentCookie': u'yes',
 | 
			
		||||
                u'_utf8': u'霱',
 | 
			
		||||
                u'bgresponse': u'js_disabled',
 | 
			
		||||
                u'checkConnection': u'',
 | 
			
		||||
                u'checkedDomains': u'youtube',
 | 
			
		||||
                u'dnConn': u'',
 | 
			
		||||
                u'dsh': dsh,
 | 
			
		||||
                u'pstMsg': u'0',
 | 
			
		||||
                u'rmShown': u'1',
 | 
			
		||||
                u'secTok': u'',
 | 
			
		||||
                u'signIn': u'Sign in',
 | 
			
		||||
                u'timeStmp': u'',
 | 
			
		||||
                u'service': u'youtube',
 | 
			
		||||
                u'uilel': u'3',
 | 
			
		||||
                u'hl': u'en_US',
 | 
			
		||||
        }
 | 
			
		||||
        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 | 
			
		||||
        # chokes on unicode
 | 
			
		||||
        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 | 
			
		||||
        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 | 
			
		||||
        try:
 | 
			
		||||
            self.report_login()
 | 
			
		||||
            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 | 
			
		||||
            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 | 
			
		||||
                self._downloader.report_warning(u'unable to log in: bad username or password')
 | 
			
		||||
                return False
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 | 
			
		||||
            return False
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def _confirm_age(self):
 | 
			
		||||
        age_form = {
 | 
			
		||||
                'next_url':     '/',
 | 
			
		||||
                'action_confirm':   'Confirm',
 | 
			
		||||
                }
 | 
			
		||||
        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 | 
			
		||||
        try:
 | 
			
		||||
            self.report_age_confirmation()
 | 
			
		||||
            compat_urllib_request.urlopen(request).read().decode('utf-8')
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
        if self._downloader is None:
 | 
			
		||||
            return
 | 
			
		||||
        if not self._set_language():
 | 
			
		||||
            return
 | 
			
		||||
        if not self._login():
 | 
			
		||||
            return
 | 
			
		||||
        self._confirm_age()
 | 
			
		||||
 | 
			
		||||
class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
    IE_DESC = u'YouTube.com'
 | 
			
		||||
    _VALID_URL = r"""^
 | 
			
		||||
                     (
 | 
			
		||||
@@ -45,14 +151,16 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
                     ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
 | 
			
		||||
                     (?(1).+)?                                                # if we found the ID, everything can follow
 | 
			
		||||
                     $"""
 | 
			
		||||
    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 | 
			
		||||
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 | 
			
		||||
    _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 | 
			
		||||
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 | 
			
		||||
    _NETRC_MACHINE = 'youtube'
 | 
			
		||||
    # Listed in order of quality
 | 
			
		||||
    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
 | 
			
		||||
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
 | 
			
		||||
    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
 | 
			
		||||
                          '95', '94', '93', '92', '132', '151',
 | 
			
		||||
                          '85', '84', '102', '83', '101', '82', '100',
 | 
			
		||||
                          ]
 | 
			
		||||
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
 | 
			
		||||
                                      '95', '94', '93', '92', '132', '151',
 | 
			
		||||
                                      '85', '102', '84', '101', '83', '100', '82',
 | 
			
		||||
                                      ]
 | 
			
		||||
    _video_extensions = {
 | 
			
		||||
        '13': '3gp',
 | 
			
		||||
        '17': 'mp4',
 | 
			
		||||
@@ -64,6 +172,24 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
        '44': 'webm',
 | 
			
		||||
        '45': 'webm',
 | 
			
		||||
        '46': 'webm',
 | 
			
		||||
 | 
			
		||||
        # 3d videos
 | 
			
		||||
        '82': 'mp4',
 | 
			
		||||
        '83': 'mp4',
 | 
			
		||||
        '84': 'mp4',
 | 
			
		||||
        '85': 'mp4',
 | 
			
		||||
        '100': 'webm',
 | 
			
		||||
        '101': 'webm',
 | 
			
		||||
        '102': 'webm',
 | 
			
		||||
        
 | 
			
		||||
        # videos that use m3u8
 | 
			
		||||
        '92': 'mp4',
 | 
			
		||||
        '93': 'mp4',
 | 
			
		||||
        '94': 'mp4',
 | 
			
		||||
        '95': 'mp4',
 | 
			
		||||
        '96': 'mp4',
 | 
			
		||||
        '132': 'mp4',
 | 
			
		||||
        '151': 'mp4',
 | 
			
		||||
    }
 | 
			
		||||
    _video_dimensions = {
 | 
			
		||||
        '5': '240x400',
 | 
			
		||||
@@ -80,7 +206,22 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
        '44': '480x854',
 | 
			
		||||
        '45': '720x1280',
 | 
			
		||||
        '46': '1080x1920',
 | 
			
		||||
        '82': '360p',
 | 
			
		||||
        '83': '480p',
 | 
			
		||||
        '84': '720p',
 | 
			
		||||
        '85': '1080p',
 | 
			
		||||
        '92': '240p',
 | 
			
		||||
        '93': '360p',
 | 
			
		||||
        '94': '480p',
 | 
			
		||||
        '95': '720p',
 | 
			
		||||
        '96': '1080p',
 | 
			
		||||
        '100': '360p',
 | 
			
		||||
        '101': '480p',
 | 
			
		||||
        '102': '720p',        
 | 
			
		||||
        '132': '240p',
 | 
			
		||||
        '151': '72p',
 | 
			
		||||
    }
 | 
			
		||||
    _3d_itags = ['85', '84', '102', '83', '101', '82', '100']
 | 
			
		||||
    IE_NAME = u'youtube'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
@@ -130,6 +271,21 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
                u"uploader_id": u"justintimberlakeVEVO"
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
 | 
			
		||||
            u'file': u'TGi3HqYrWHE.mp4',
 | 
			
		||||
            u'note': u'm3u8 video',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u'title': u'Triathlon - Men - London 2012 Olympic Games',
 | 
			
		||||
                u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
 | 
			
		||||
                u'uploader': u'olympic',
 | 
			
		||||
                u'upload_date': u'20120807',
 | 
			
		||||
                u'uploader_id': u'olympic',
 | 
			
		||||
            },
 | 
			
		||||
            u'params': {
 | 
			
		||||
                u'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -139,10 +295,6 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
 | 
			
		||||
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 | 
			
		||||
 | 
			
		||||
    def report_lang(self):
 | 
			
		||||
        """Report attempt to set language."""
 | 
			
		||||
        self.to_screen(u'Setting language')
 | 
			
		||||
 | 
			
		||||
    def report_video_webpage_download(self, video_id):
 | 
			
		||||
        """Report attempt to download video webpage."""
 | 
			
		||||
        self.to_screen(u'%s: Downloading video webpage' % video_id)
 | 
			
		||||
@@ -183,26 +335,40 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
 | 
			
		||||
        elif len(s) == 90:
 | 
			
		||||
            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
 | 
			
		||||
        elif len(s) == 89:
 | 
			
		||||
            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
 | 
			
		||||
        elif len(s) == 88:
 | 
			
		||||
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
 | 
			
		||||
        elif len(s) == 87:
 | 
			
		||||
            return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
 | 
			
		||||
            return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53]
 | 
			
		||||
        elif len(s) == 86:
 | 
			
		||||
            return s[2:63] + s[82] + s[64:82] + s[63]
 | 
			
		||||
            return s[5:20] + s[2] + s[21:]
 | 
			
		||||
        elif len(s) == 85:
 | 
			
		||||
            return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
 | 
			
		||||
            return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
 | 
			
		||||
        elif len(s) == 84:
 | 
			
		||||
            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
 | 
			
		||||
            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
 | 
			
		||||
        elif len(s) == 83:
 | 
			
		||||
            return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:]
 | 
			
		||||
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
 | 
			
		||||
        elif len(s) == 82:
 | 
			
		||||
            return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
 | 
			
		||||
        elif len(s) == 81:
 | 
			
		||||
            return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
 | 
			
		||||
            return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
 | 
			
		||||
        elif len(s) == 79:
 | 
			
		||||
            return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
 | 
			
		||||
 | 
			
		||||
    def _decrypt_signature_age_gate(self, s):
 | 
			
		||||
        # The videos with age protection use another player, so the algorithms
 | 
			
		||||
        # can be different.
 | 
			
		||||
        if len(s) == 86:
 | 
			
		||||
            return s[2:63] + s[82] + s[64:82] + s[63]
 | 
			
		||||
        else:
 | 
			
		||||
            # Fallback to the other algortihms
 | 
			
		||||
            return self._decrypt_signature(s)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def _get_available_subtitles(self, video_id):
 | 
			
		||||
        self.report_video_subtitles_download(video_id)
 | 
			
		||||
        request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
 | 
			
		||||
@@ -304,92 +470,9 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
    def _print_formats(self, formats):
 | 
			
		||||
        print('Available formats:')
 | 
			
		||||
        for x in formats:
 | 
			
		||||
            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
        if self._downloader is None:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # Set language
 | 
			
		||||
        request = compat_urllib_request.Request(self._LANG_URL)
 | 
			
		||||
        try:
 | 
			
		||||
            self.report_lang()
 | 
			
		||||
            compat_urllib_request.urlopen(request).read()
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        (username, password) = self._get_login_info()
 | 
			
		||||
 | 
			
		||||
        # No authentication to be performed
 | 
			
		||||
        if username is None:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL)
 | 
			
		||||
        try:
 | 
			
		||||
            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        galx = None
 | 
			
		||||
        dsh = None
 | 
			
		||||
        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
 | 
			
		||||
        if match:
 | 
			
		||||
          galx = match.group(1)
 | 
			
		||||
 | 
			
		||||
        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
 | 
			
		||||
        if match:
 | 
			
		||||
          dsh = match.group(1)
 | 
			
		||||
 | 
			
		||||
        # Log in
 | 
			
		||||
        login_form_strs = {
 | 
			
		||||
                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
 | 
			
		||||
                u'Email': username,
 | 
			
		||||
                u'GALX': galx,
 | 
			
		||||
                u'Passwd': password,
 | 
			
		||||
                u'PersistentCookie': u'yes',
 | 
			
		||||
                u'_utf8': u'霱',
 | 
			
		||||
                u'bgresponse': u'js_disabled',
 | 
			
		||||
                u'checkConnection': u'',
 | 
			
		||||
                u'checkedDomains': u'youtube',
 | 
			
		||||
                u'dnConn': u'',
 | 
			
		||||
                u'dsh': dsh,
 | 
			
		||||
                u'pstMsg': u'0',
 | 
			
		||||
                u'rmShown': u'1',
 | 
			
		||||
                u'secTok': u'',
 | 
			
		||||
                u'signIn': u'Sign in',
 | 
			
		||||
                u'timeStmp': u'',
 | 
			
		||||
                u'service': u'youtube',
 | 
			
		||||
                u'uilel': u'3',
 | 
			
		||||
                u'hl': u'en_US',
 | 
			
		||||
        }
 | 
			
		||||
        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 | 
			
		||||
        # chokes on unicode
 | 
			
		||||
        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 | 
			
		||||
        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 | 
			
		||||
        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 | 
			
		||||
        try:
 | 
			
		||||
            self.report_login()
 | 
			
		||||
            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 | 
			
		||||
            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 | 
			
		||||
                self._downloader.report_warning(u'unable to log in: bad username or password')
 | 
			
		||||
                return
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # Confirm age
 | 
			
		||||
        age_form = {
 | 
			
		||||
                'next_url':     '/',
 | 
			
		||||
                'action_confirm':   'Confirm',
 | 
			
		||||
                }
 | 
			
		||||
        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 | 
			
		||||
        try:
 | 
			
		||||
            self.report_age_confirmation()
 | 
			
		||||
            compat_urllib_request.urlopen(request).read().decode('utf-8')
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 | 
			
		||||
            print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
 | 
			
		||||
                                        self._video_dimensions.get(x, '???'),
 | 
			
		||||
                                        ' (3D)' if x in self._3d_itags else ''))
 | 
			
		||||
 | 
			
		||||
    def _extract_id(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
 | 
			
		||||
@@ -398,6 +481,57 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
        video_id = mobj.group(2)
 | 
			
		||||
        return video_id
 | 
			
		||||
 | 
			
		||||
    def _get_video_url_list(self, url_map):
 | 
			
		||||
        """
 | 
			
		||||
        Transform a dictionary in the format {itag:url} to a list of (itag, url)
 | 
			
		||||
        with the requested formats.
 | 
			
		||||
        """
 | 
			
		||||
        req_format = self._downloader.params.get('format', None)
 | 
			
		||||
        format_limit = self._downloader.params.get('format_limit', None)
 | 
			
		||||
        available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
 | 
			
		||||
        if format_limit is not None and format_limit in available_formats:
 | 
			
		||||
            format_list = available_formats[available_formats.index(format_limit):]
 | 
			
		||||
        else:
 | 
			
		||||
            format_list = available_formats
 | 
			
		||||
        existing_formats = [x for x in format_list if x in url_map]
 | 
			
		||||
        if len(existing_formats) == 0:
 | 
			
		||||
            raise ExtractorError(u'no known formats available for video')
 | 
			
		||||
        if self._downloader.params.get('listformats', None):
 | 
			
		||||
            self._print_formats(existing_formats)
 | 
			
		||||
            return
 | 
			
		||||
        if req_format is None or req_format == 'best':
 | 
			
		||||
            video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
 | 
			
		||||
        elif req_format == 'worst':
 | 
			
		||||
            video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
 | 
			
		||||
        elif req_format in ('-1', 'all'):
 | 
			
		||||
            video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
 | 
			
		||||
        else:
 | 
			
		||||
            # Specific formats. We pick the first in a slash-delimeted sequence.
 | 
			
		||||
            # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
 | 
			
		||||
            req_formats = req_format.split('/')
 | 
			
		||||
            video_url_list = None
 | 
			
		||||
            for rf in req_formats:
 | 
			
		||||
                if rf in url_map:
 | 
			
		||||
                    video_url_list = [(rf, url_map[rf])]
 | 
			
		||||
                    break
 | 
			
		||||
            if video_url_list is None:
 | 
			
		||||
                raise ExtractorError(u'requested format not available')
 | 
			
		||||
        return video_url_list
 | 
			
		||||
 | 
			
		||||
    def _extract_from_m3u8(self, manifest_url, video_id):
 | 
			
		||||
        url_map = {}
 | 
			
		||||
        def _get_urls(_manifest):
 | 
			
		||||
            lines = _manifest.split('\n')
 | 
			
		||||
            urls = filter(lambda l: l and not l.startswith('#'),
 | 
			
		||||
                            lines)
 | 
			
		||||
            return urls
 | 
			
		||||
        manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
 | 
			
		||||
        formats_urls = _get_urls(manifest)
 | 
			
		||||
        for format_url in formats_urls:
 | 
			
		||||
            itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
 | 
			
		||||
            url_map[itag] = format_url
 | 
			
		||||
        return url_map
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
 | 
			
		||||
            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
 | 
			
		||||
@@ -552,7 +686,6 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
            video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
 | 
			
		||||
 | 
			
		||||
        # Decide which formats to download
 | 
			
		||||
        req_format = self._downloader.params.get('format', None)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
 | 
			
		||||
@@ -587,8 +720,8 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
                            s = url_data['s'][0]
 | 
			
		||||
                            if age_gate:
 | 
			
		||||
                                player_version = self._search_regex(r'ad3-(.+?)\.swf',
 | 
			
		||||
                                    video_info['ad3_module'][0], 'flash player',
 | 
			
		||||
                                    fatal=False)
 | 
			
		||||
                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
 | 
			
		||||
                                    'flash player', fatal=False)
 | 
			
		||||
                                player = 'flash player %s' % player_version
 | 
			
		||||
                            else:
 | 
			
		||||
                                player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
 | 
			
		||||
@@ -596,41 +729,25 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
                            parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
 | 
			
		||||
                            self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
 | 
			
		||||
                                (len(s), parts_sizes, url_data['itag'][0], player))
 | 
			
		||||
                        signature = self._decrypt_signature(url_data['s'][0])
 | 
			
		||||
                        encrypted_sig = url_data['s'][0]
 | 
			
		||||
                        if age_gate:
 | 
			
		||||
                            signature = self._decrypt_signature_age_gate(encrypted_sig)
 | 
			
		||||
                        else:
 | 
			
		||||
                            signature = self._decrypt_signature(encrypted_sig)
 | 
			
		||||
                        url += '&signature=' + signature
 | 
			
		||||
                    if 'ratebypass' not in url:
 | 
			
		||||
                        url += '&ratebypass=yes'
 | 
			
		||||
                    url_map[url_data['itag'][0]] = url
 | 
			
		||||
 | 
			
		||||
            format_limit = self._downloader.params.get('format_limit', None)
 | 
			
		||||
            available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
 | 
			
		||||
            if format_limit is not None and format_limit in available_formats:
 | 
			
		||||
                format_list = available_formats[available_formats.index(format_limit):]
 | 
			
		||||
            else:
 | 
			
		||||
                format_list = available_formats
 | 
			
		||||
            existing_formats = [x for x in format_list if x in url_map]
 | 
			
		||||
            if len(existing_formats) == 0:
 | 
			
		||||
                raise ExtractorError(u'no known formats available for video')
 | 
			
		||||
            if self._downloader.params.get('listformats', None):
 | 
			
		||||
                self._print_formats(existing_formats)
 | 
			
		||||
            video_url_list = self._get_video_url_list(url_map)
 | 
			
		||||
            if not video_url_list:
 | 
			
		||||
                return
 | 
			
		||||
            if req_format is None or req_format == 'best':
 | 
			
		||||
                video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
 | 
			
		||||
            elif req_format == 'worst':
 | 
			
		||||
                video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
 | 
			
		||||
            elif req_format in ('-1', 'all'):
 | 
			
		||||
                video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
 | 
			
		||||
            else:
 | 
			
		||||
                # Specific formats. We pick the first in a slash-delimeted sequence.
 | 
			
		||||
                # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
 | 
			
		||||
                req_formats = req_format.split('/')
 | 
			
		||||
                video_url_list = None
 | 
			
		||||
                for rf in req_formats:
 | 
			
		||||
                    if rf in url_map:
 | 
			
		||||
                        video_url_list = [(rf, url_map[rf])]
 | 
			
		||||
                        break
 | 
			
		||||
                if video_url_list is None:
 | 
			
		||||
                    raise ExtractorError(u'requested format not available')
 | 
			
		||||
        elif video_info.get('hlsvp'):
 | 
			
		||||
            manifest_url = video_info['hlsvp'][0]
 | 
			
		||||
            url_map = self._extract_from_m3u8(manifest_url, video_id)
 | 
			
		||||
            video_url_list = self._get_video_url_list(url_map)
 | 
			
		||||
            if not video_url_list:
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
 | 
			
		||||
 | 
			
		||||
@@ -639,8 +756,9 @@ class YoutubeIE(InfoExtractor):
 | 
			
		||||
            # Extension
 | 
			
		||||
            video_extension = self._video_extensions.get(format_param, 'flv')
 | 
			
		||||
 | 
			
		||||
            video_format = '{0} - {1}'.format(format_param if format_param else video_extension,
 | 
			
		||||
                                              self._video_dimensions.get(format_param, '???'))
 | 
			
		||||
            video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
 | 
			
		||||
                                              self._video_dimensions.get(format_param, '???'),
 | 
			
		||||
                                              ' (3D)' if format_param in self._3d_itags else '')
 | 
			
		||||
 | 
			
		||||
            results.append({
 | 
			
		||||
                'id':       video_id,
 | 
			
		||||
@@ -670,10 +788,10 @@ class YoutubePlaylistIE(InfoExtractor):
 | 
			
		||||
                           \? (?:.*?&)*? (?:p|a|list)=
 | 
			
		||||
                        |  p/
 | 
			
		||||
                        )
 | 
			
		||||
                        ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
 | 
			
		||||
                        ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
 | 
			
		||||
                        .*
 | 
			
		||||
                     |
 | 
			
		||||
                        ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
 | 
			
		||||
                        ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
 | 
			
		||||
                     )"""
 | 
			
		||||
    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
 | 
			
		||||
    _MAX_RESULTS = 50
 | 
			
		||||
@@ -692,11 +810,14 @@ class YoutubePlaylistIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Download playlist videos from API
 | 
			
		||||
        playlist_id = mobj.group(1) or mobj.group(2)
 | 
			
		||||
        page_num = 1
 | 
			
		||||
        videos = []
 | 
			
		||||
 | 
			
		||||
        while True:
 | 
			
		||||
            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1)
 | 
			
		||||
        for page_num in itertools.count(1):
 | 
			
		||||
            start_index = self._MAX_RESULTS * (page_num - 1) + 1
 | 
			
		||||
            if start_index >= 1000:
 | 
			
		||||
                self._downloader.report_warning(u'Max number of results reached')
 | 
			
		||||
                break
 | 
			
		||||
            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
 | 
			
		||||
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
@@ -716,10 +837,6 @@ class YoutubePlaylistIE(InfoExtractor):
 | 
			
		||||
                if 'media$group' in entry and 'media$player' in entry['media$group']:
 | 
			
		||||
                    videos.append((index, entry['media$group']['media$player']['url']))
 | 
			
		||||
 | 
			
		||||
            if len(response['feed']['entry']) < self._MAX_RESULTS:
 | 
			
		||||
                break
 | 
			
		||||
            page_num += 1
 | 
			
		||||
 | 
			
		||||
        videos = [v[1] for v in sorted(videos)]
 | 
			
		||||
 | 
			
		||||
        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
 | 
			
		||||
@@ -731,7 +848,7 @@ class YoutubeChannelIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
 | 
			
		||||
    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
 | 
			
		||||
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
 | 
			
		||||
    _MORE_PAGES_URL = 'http://www.youtube.com/channel_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
 | 
			
		||||
    _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
 | 
			
		||||
    IE_NAME = u'youtube:channel'
 | 
			
		||||
 | 
			
		||||
    def extract_videos_from_page(self, page):
 | 
			
		||||
@@ -762,9 +879,7 @@ class YoutubeChannelIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Download any subsequent channel pages using the json-based channel_ajax query
 | 
			
		||||
        if self._MORE_PAGES_INDICATOR in page:
 | 
			
		||||
            while True:
 | 
			
		||||
                pagenum = pagenum + 1
 | 
			
		||||
 | 
			
		||||
            for pagenum in itertools.count(1):
 | 
			
		||||
                url = self._MORE_PAGES_URL % (pagenum, channel_id)
 | 
			
		||||
                page = self._download_webpage(url, channel_id,
 | 
			
		||||
                                              u'Downloading page #%s' % pagenum)
 | 
			
		||||
@@ -807,9 +922,8 @@ class YoutubeUserIE(InfoExtractor):
 | 
			
		||||
        # all of them.
 | 
			
		||||
 | 
			
		||||
        video_ids = []
 | 
			
		||||
        pagenum = 0
 | 
			
		||||
 | 
			
		||||
        while True:
 | 
			
		||||
        for pagenum in itertools.count(0):
 | 
			
		||||
            start_index = pagenum * self._GDATA_PAGE_SIZE + 1
 | 
			
		||||
 | 
			
		||||
            gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
 | 
			
		||||
@@ -834,8 +948,6 @@ class YoutubeUserIE(InfoExtractor):
 | 
			
		||||
            if len(ids_in_page) < self._GDATA_PAGE_SIZE:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            pagenum += 1
 | 
			
		||||
 | 
			
		||||
        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
 | 
			
		||||
        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
 | 
			
		||||
        return [self.playlist_result(url_results, playlist_title = username)]
 | 
			
		||||
@@ -898,38 +1010,75 @@ class YoutubeShowIE(InfoExtractor):
 | 
			
		||||
        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeSubscriptionsIE(YoutubeIE):
 | 
			
		||||
    """It's a subclass of YoutubeIE because we need to login"""
 | 
			
		||||
    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
 | 
			
		||||
    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
 | 
			
		||||
    IE_NAME = u'youtube:subscriptions'
 | 
			
		||||
    _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
 | 
			
		||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
 | 
			
		||||
    """
 | 
			
		||||
    Base class for extractors that fetch info from
 | 
			
		||||
    http://www.youtube.com/feed_ajax
 | 
			
		||||
    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
 | 
			
		||||
    """
 | 
			
		||||
    _LOGIN_REQUIRED = True
 | 
			
		||||
    _PAGING_STEP = 30
 | 
			
		||||
    # use action_load_personal_feed instead of action_load_system_feed
 | 
			
		||||
    _PERSONAL_FEED = False
 | 
			
		||||
 | 
			
		||||
    # Overwrite YoutubeIE properties we don't want
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return re.match(cls._VALID_URL, url) is not None
 | 
			
		||||
    @property
 | 
			
		||||
    def _FEED_TEMPLATE(self):
 | 
			
		||||
        action = 'action_load_system_feed'
 | 
			
		||||
        if self._PERSONAL_FEED:
 | 
			
		||||
            action = 'action_load_personal_feed'
 | 
			
		||||
        return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def IE_NAME(self):
 | 
			
		||||
        return u'youtube:%s' % self._FEED_NAME
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
        (username, password) = self._get_login_info()
 | 
			
		||||
        if username is None:
 | 
			
		||||
            raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
 | 
			
		||||
        super(YoutubeSubscriptionsIE, self)._real_initialize()
 | 
			
		||||
        self._login()
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        feed_entries = []
 | 
			
		||||
        # The step argument is available only in 2.7 or higher
 | 
			
		||||
        for i in itertools.count(0):
 | 
			
		||||
            paging = i*self._PAGING_STEP
 | 
			
		||||
            info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
 | 
			
		||||
            info = self._download_webpage(self._FEED_TEMPLATE % paging,
 | 
			
		||||
                                          u'%s feed' % self._FEED_NAME,
 | 
			
		||||
                                          u'Downloading page %s' % i)
 | 
			
		||||
            info = json.loads(info)
 | 
			
		||||
            feed_html = info['feed_html']
 | 
			
		||||
            m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
 | 
			
		||||
            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
 | 
			
		||||
            ids = orderedSet(m.group(1) for m in m_ids)
 | 
			
		||||
            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
 | 
			
		||||
            if info['paging'] is None:
 | 
			
		||||
                break
 | 
			
		||||
        return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
 | 
			
		||||
        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 | 
			
		||||
 | 
			
		||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
 | 
			
		||||
    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
 | 
			
		||||
    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
 | 
			
		||||
    _FEED_NAME = 'subscriptions'
 | 
			
		||||
    _PLAYLIST_TITLE = u'Youtube Subscriptions'
 | 
			
		||||
 | 
			
		||||
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
 | 
			
		||||
    IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
 | 
			
		||||
    _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
 | 
			
		||||
    _FEED_NAME = 'recommended'
 | 
			
		||||
    _PLAYLIST_TITLE = u'Youtube Recommended videos'
 | 
			
		||||
 | 
			
		||||
class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
 | 
			
		||||
    IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
 | 
			
		||||
    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
 | 
			
		||||
    _FEED_NAME = 'watch_later'
 | 
			
		||||
    _PLAYLIST_TITLE = u'Youtube Watch Later'
 | 
			
		||||
    _PAGING_STEP = 100
 | 
			
		||||
    _PERSONAL_FEED = True
 | 
			
		||||
 | 
			
		||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
    IE_NAME = u'youtube:favorites'
 | 
			
		||||
    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
 | 
			
		||||
    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
 | 
			
		||||
    _LOGIN_REQUIRED = True
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
 | 
			
		||||
        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
 | 
			
		||||
        return self.url_result(playlist_id, 'YoutubePlaylist')
 | 
			
		||||
 
 | 
			
		||||
@@ -207,7 +207,7 @@ if sys.version_info >= (2,7):
 | 
			
		||||
    def find_xpath_attr(node, xpath, key, val):
 | 
			
		||||
        """ Find the xpath xpath[@key=val] """
 | 
			
		||||
        assert re.match(r'^[a-zA-Z]+$', key)
 | 
			
		||||
        assert re.match(r'^[a-zA-Z@]*$', val)
 | 
			
		||||
        assert re.match(r'^[a-zA-Z@\s]*$', val)
 | 
			
		||||
        expr = xpath + u"[@%s='%s']" % (key, val)
 | 
			
		||||
        return node.find(expr)
 | 
			
		||||
else:
 | 
			
		||||
@@ -497,7 +497,7 @@ class ExtractorError(Exception):
 | 
			
		||||
        if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 | 
			
		||||
            expected = True
 | 
			
		||||
        if not expected:
 | 
			
		||||
            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 | 
			
		||||
            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 | 
			
		||||
        super(ExtractorError, self).__init__(msg)
 | 
			
		||||
 | 
			
		||||
        self.traceback = tb
 | 
			
		||||
 
 | 
			
		||||
@@ -1,2 +1,2 @@
 | 
			
		||||
 | 
			
		||||
__version__ = '2013.07.18'
 | 
			
		||||
__version__ = '2013.08.14'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user