Compare commits
	
		
			380 Commits
		
	
	
		
			2016.05.01
			...
			2016.06.11
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					6626c214e1 | ||
| 
						 | 
					d845622b2e | ||
| 
						 | 
					1058f56e96 | ||
| 
						 | 
					0434358823 | ||
| 
						 | 
					3841256c2c | ||
| 
						 | 
					bdf16f8140 | ||
| 
						 | 
					836ab0c554 | ||
| 
						 | 
					6c0376fe4f | ||
| 
						 | 
					1fa309da40 | ||
| 
						 | 
					daa0df9e8b | ||
| 
						 | 
					09728d5fbc | ||
| 
						 | 
					c16f8a4659 | ||
| 
						 | 
					a225238530 | ||
| 
						 | 
					55b2f099c0 | ||
| 
						 | 
					9631a94fb5 | ||
| 
						 | 
					cc4444662c | ||
| 
						 | 
					de3eb07ed6 | ||
| 
						 | 
					5de008e8c3 | ||
| 
						 | 
					3e74b444e7 | ||
| 
						 | 
					e1e0a10c56 | ||
| 
						 | 
					436214baf7 | ||
| 
						 | 
					506d0e9693 | ||
| 
						 | 
					55290788d3 | ||
| 
						 | 
					bc7e7adf51 | ||
| 
						 | 
					b0aebe702c | ||
| 
						 | 
					416878f41f | ||
| 
						 | 
					c0fed3bda5 | ||
| 
						 | 
					bb1e44cc8e | ||
| 
						 | 
					21efee5f8b | ||
| 
						 | 
					e2713d32f4 | ||
| 
						 | 
					e21c26daf9 | ||
| 
						 | 
					1594a4932f | ||
| 
						 | 
					6869d634c6 | ||
| 
						 | 
					50918c4ee0 | ||
| 
						 | 
					6c33d24b46 | ||
| 
						 | 
					be6217b261 | ||
| 
						 | 
					9d51a0a9a1 | ||
| 
						 | 
					39da509f67 | ||
| 
						 | 
					a479b8f687 | ||
| 
						 | 
					48a5eabc48 | ||
| 
						 | 
					11380753b5 | ||
| 
						 | 
					411c590a1f | ||
| 
						 | 
					6da8d7de69 | ||
| 
						 | 
					c6308b3153 | ||
| 
						 | 
					fc0a45fa41 | ||
| 
						 | 
					e6e90515db | ||
| 
						 | 
					22a0a95247 | ||
| 
						 | 
					50ce1c331c | ||
| 
						 | 
					7264e38591 | ||
| 
						 | 
					33d9f3707c | ||
| 
						 | 
					a26a9d6239 | ||
| 
						 | 
					a4a8201c02 | ||
| 
						 | 
					a6571f1073 | ||
| 
						 | 
					57b6e9652e | ||
| 
						 | 
					3d9b3605a3 | ||
| 
						 | 
					74193838f7 | ||
| 
						 | 
					fb94e260b5 | ||
| 
						 | 
					345dec937f | ||
| 
						 | 
					4315f74fa8 | ||
| 
						 | 
					e67f688025 | ||
| 
						 | 
					db59b37d0b | ||
| 
						 | 
					244fe977fe | ||
| 
						 | 
					7b0d1c2859 | ||
| 
						 | 
					21d0a8e48b | ||
| 
						 | 
					47f12ad3e3 | ||
| 
						 | 
					8f1aaa97a1 | ||
| 
						 | 
					9d78524cbe | ||
| 
						 | 
					bc270284b5 | ||
| 
						 | 
					c93b4eaceb | ||
| 
						 | 
					71b9cb3107 | ||
| 
						 | 
					633b444fd2 | ||
| 
						 | 
					51c4d85ce7 | ||
| 
						 | 
					631d4c87ee | ||
| 
						 | 
					1e236d7e23 | ||
| 
						 | 
					2c34735267 | ||
| 
						 | 
					39b32571df | ||
| 
						 | 
					db56f281d9 | ||
| 
						 | 
					e92b552a10 | ||
| 
						 | 
					1ae6c83bce | ||
| 
						 | 
					0fc832e1b2 | ||
| 
						 | 
					7def35712a | ||
| 
						 | 
					cad88f96dc | ||
| 
						 | 
					762d44c956 | ||
| 
						 | 
					4d8856d511 | ||
| 
						 | 
					c917106be4 | ||
| 
						 | 
					76e9cd7f24 | ||
| 
						 | 
					bf4c6a38e1 | ||
| 
						 | 
					7f3c3dfa52 | ||
| 
						 | 
					9c3c447eb3 | ||
| 
						 | 
					ad73083ff0 | ||
| 
						 | 
					1e8b59243f | ||
| 
						 | 
					c88270271e | ||
| 
						 | 
					b96f007eeb | ||
| 
						 | 
					9a4aec8b7e | ||
| 
						 | 
					54fb199681 | ||
| 
						 | 
					8c32e5dc32 | ||
| 
						 | 
					0ea590076f | ||
| 
						 | 
					4a684895c0 | ||
| 
						 | 
					f4e4aa9b6b | ||
| 
						 | 
					5e3856a2c5 | ||
| 
						 | 
					6e6b9f600f | ||
| 
						 | 
					6a1df4fb5f | ||
| 
						 | 
					dde1ce7c06 | ||
| 
						 | 
					811586ebcf | ||
| 
						 | 
					0ff3749bfe | ||
| 
						 | 
					28bab13348 | ||
| 
						 | 
					877032314f | ||
| 
						 | 
					e7d85c4ef7 | ||
| 
						 | 
					8ec2b2c41c | ||
| 
						 | 
					197a5da1d0 | ||
| 
						 | 
					abbb2938fa | ||
| 
						 | 
					f657b1a5f2 | ||
| 
						 | 
					86a52881c6 | ||
| 
						 | 
					8267423652 | ||
| 
						 | 
					917a3196f8 | ||
| 
						 | 
					56bd028a0f | ||
| 
						 | 
					681b923b5c | ||
| 
						 | 
					9ed6d8c6c5 | ||
| 
						 | 
					f3fb420b82 | ||
| 
						 | 
					165e3561e9 | ||
| 
						 | 
					27f17c0eab | ||
| 
						 | 
					44c8892369 | ||
| 
						 | 
					f574103d7c | ||
| 
						 | 
					6d138e98e3 | ||
| 
						 | 
					2a329110b9 | ||
| 
						 | 
					2bee7b25f3 | ||
| 
						 | 
					92cf872a48 | ||
| 
						 | 
					6461f2b7ec | ||
| 
						 | 
					807cf7b07f | ||
| 
						 | 
					de7d76af52 | ||
| 
						 | 
					11c70deba7 | ||
| 
						 | 
					f36532404d | ||
| 
						 | 
					77b8b4e696 | ||
| 
						 | 
					2615fa7584 | ||
| 
						 | 
					3a686853e1 | ||
| 
						 | 
					949fc42e00 | ||
| 
						 | 
					33a1ff7113 | ||
| 
						 | 
					bec2c14f2c | ||
| 
						 | 
					37f972954d | ||
| 
						 | 
					3874e6ea66 | ||
| 
						 | 
					fac2af3c51 | ||
| 
						 | 
					6f8cb24219 | ||
| 
						 | 
					448bb5f333 | ||
| 
						 | 
					293c255688 | ||
| 
						 | 
					ac88d2316e | ||
| 
						 | 
					5950cb1d6d | ||
| 
						 | 
					761052db92 | ||
| 
						 | 
					240b60453e | ||
| 
						 | 
					85b0fe7d64 | ||
| 
						 | 
					0a5685b26f | ||
| 
						 | 
					6f748df43f | ||
| 
						 | 
					b410cb83d4 | ||
| 
						 | 
					da9d82840a | ||
| 
						 | 
					4ee0b8afdb | ||
| 
						 | 
					1de32771e1 | ||
| 
						 | 
					688c634b7d | ||
| 
						 | 
					0d6ee97508 | ||
| 
						 | 
					6b43132ce9 | ||
| 
						 | 
					a4690b3244 | ||
| 
						 | 
					444417edb5 | ||
| 
						 | 
					277c7465f5 | ||
| 
						 | 
					25bcd3550e | ||
| 
						 | 
					a4760d204f | ||
| 
						 | 
					e8593f346a | ||
| 
						 | 
					05b651e3a5 | ||
| 
						 | 
					42a7439717 | ||
| 
						 | 
					b1e9ebd080 | ||
| 
						 | 
					0c50eeb987 | ||
| 
						 | 
					4b464a6a78 | ||
| 
						 | 
					5db9df622f | ||
| 
						 | 
					5181759c0d | ||
| 
						 | 
					e54373204a | ||
| 
						 | 
					102810ef04 | ||
| 
						 | 
					78d3b3e213 | ||
| 
						 | 
					7a46542f97 | ||
| 
						 | 
					eb7941e3e6 | ||
| 
						 | 
					db3b8b2103 | ||
| 
						 | 
					c5f5155100 | ||
| 
						 | 
					4a12077855 | ||
| 
						 | 
					a4a7c44bd3 | ||
| 
						 | 
					70346165fe | ||
| 
						 | 
					c776b99691 | ||
| 
						 | 
					e9297256d4 | ||
| 
						 | 
					e5871c672b | ||
| 
						 | 
					9b06b0fb92 | ||
| 
						 | 
					4f3a25c2b4 | ||
| 
						 | 
					21a19aa94d | ||
| 
						 | 
					c6b9cf05e1 | ||
| 
						 | 
					4d8819d249 | ||
| 
						 | 
					898f4b49cc | ||
| 
						 | 
					0150a00f33 | ||
| 
						 | 
					c8831015f4 | ||
| 
						 | 
					92d221ad48 | ||
| 
						 | 
					0db9a05f88 | ||
| 
						 | 
					e03b35b8f9 | ||
| 
						 | 
					d2fee3c99e | ||
| 
						 | 
					598869afb1 | ||
| 
						 | 
					7e642e4fd6 | ||
| 
						 | 
					c8cc3745fb | ||
| 
						 | 
					4c718d3c50 | ||
| 
						 | 
					115c65793a | ||
| 
						 | 
					661d46b28f | ||
| 
						 | 
					5ce3d5bd1b | ||
| 
						 | 
					612b5f403e | ||
| 
						 | 
					9f54e692d2 | ||
| 
						 | 
					7b2fcbfd4e | ||
| 
						 | 
					16da9bbc29 | ||
| 
						 | 
					c8602b2f9b | ||
| 
						 | 
					b219f5e51b | ||
| 
						 | 
					1846e9ade0 | ||
| 
						 | 
					6756602be6 | ||
| 
						 | 
					6c114b1210 | ||
| 
						 | 
					7ded6545ed | ||
| 
						 | 
					aa5957ac49 | ||
| 
						 | 
					64413f7563 | ||
| 
						 | 
					45f160a43c | ||
| 
						 | 
					36ca2c55db | ||
| 
						 | 
					f0c96af9cb | ||
| 
						 | 
					31a70191e7 | ||
| 
						 | 
					ad96b4c8f5 | ||
| 
						 | 
					043dc9d36f | ||
| 
						 | 
					52f7c75cff | ||
| 
						 | 
					f6e588afc0 | ||
| 
						 | 
					a001296703 | ||
| 
						 | 
					2cbd8c6781 | ||
| 
						 | 
					8585dc4cdc | ||
| 
						 | 
					dd81769c62 | ||
| 
						 | 
					46bc9b7d7c | ||
| 
						 | 
					b78531a36a | ||
| 
						 | 
					11e6a0b641 | ||
| 
						 | 
					15cda1ef77 | ||
| 
						 | 
					055f0d3d06 | ||
| 
						 | 
					cdd94c2eae | ||
| 
						 | 
					36755d9d69 | ||
| 
						 | 
					f7199423e5 | ||
| 
						 | 
					a0a81918f1 | ||
| 
						 | 
					5572d598a5 | ||
| 
						 | 
					cec9727c7f | ||
| 
						 | 
					79298173c5 | ||
| 
						 | 
					69c9cc2716 | ||
| 
						 | 
					ed56f26039 | ||
| 
						 | 
					6f41b2bcf1 | ||
| 
						 | 
					cda6d47aad | ||
| 
						 | 
					5d39176f6d | ||
| 
						 | 
					5c86bfe70f | ||
| 
						 | 
					364cf465dd | ||
| 
						 | 
					ca950f49e9 | ||
| 
						 | 
					89ac4a19e6 | ||
| 
						 | 
					640eea0a0c | ||
| 
						 | 
					bd1e484448 | ||
| 
						 | 
					a834622b89 | ||
| 
						 | 
					707bb426b1 | ||
| 
						 | 
					66e7ace17a | ||
| 
						 | 
					791ff52f75 | ||
| 
						 | 
					98d560f205 | ||
| 
						 | 
					afcc317800 | ||
| 
						 | 
					b5abf86148 | ||
| 
						 | 
					134c6ea856 | ||
| 
						 | 
					0730be9022 | ||
| 
						 | 
					96c2e3e909 | ||
| 
						 | 
					f196508f7b | ||
| 
						 | 
					cc1028aa6d | ||
| 
						 | 
					ad55e10165 | ||
| 
						 | 
					18cf6381f6 | ||
| 
						 | 
					cdf32ff15d | ||
| 
						 | 
					99d79b8692 | ||
| 
						 | 
					b9e7bc55da | ||
| 
						 | 
					d8d540cf0d | ||
| 
						 | 
					0df79d552a | ||
| 
						 | 
					0db3a66162 | ||
| 
						 | 
					7581bfc958 | ||
| 
						 | 
					f388f616c1 | ||
| 
						 | 
					a3fa6024d6 | ||
| 
						 | 
					1b405bb47d | ||
| 
						 | 
					7e8ddca1bb | ||
| 
						 | 
					778a1ccca7 | ||
| 
						 | 
					4540515cb3 | ||
| 
						 | 
					e0741fd449 | ||
| 
						 | 
					e73b9c65e2 | ||
| 
						 | 
					702ccf2dc0 | ||
| 
						 | 
					28b4f73620 | ||
| 
						 | 
					c2876afafe | ||
| 
						 | 
					6ddb4888d2 | ||
| 
						 | 
					fa5cb8d021 | ||
| 
						 | 
					e21f17fc86 | ||
| 
						 | 
					edaa23f822 | ||
| 
						 | 
					d5ae6bb501 | ||
| 
						 | 
					51fb4995a5 | ||
| 
						 | 
					9e9cd7248d | ||
| 
						 | 
					72f3289ac4 | ||
| 
						 | 
					71aff18809 | ||
| 
						 | 
					dab0daeeb0 | ||
| 
						 | 
					4350b74545 | ||
| 
						 | 
					2937590e8b | ||
| 
						 | 
					fad7bbec3a | ||
| 
						 | 
					e62d9c5caa | ||
| 
						 | 
					20cfdcc910 | ||
| 
						 | 
					1292638754 | ||
| 
						 | 
					fe40f9eef2 | ||
| 
						 | 
					6104cc2985 | ||
| 
						 | 
					c15c47d19b | ||
| 
						 | 
					965fefdcd8 | ||
| 
						 | 
					3951e7eb93 | ||
| 
						 | 
					f1f6f5aa5e | ||
| 
						 | 
					eb785b856f | ||
| 
						 | 
					c52f4efaee | ||
| 
						 | 
					f23a92a0ce | ||
| 
						 | 
					3b01a9fbb6 | ||
| 
						 | 
					93fdb14177 | ||
| 
						 | 
					370d4eb8ad | ||
| 
						 | 
					3452c3a27c | ||
| 
						 | 
					9c072d38c6 | ||
| 
						 | 
					81f35fee2f | ||
| 
						 | 
					0fdbe3146c | ||
| 
						 | 
					3e169233da | ||
| 
						 | 
					f5436c5d9e | ||
| 
						 | 
					5c24873a9e | ||
| 
						 | 
					00c21c225d | ||
| 
						 | 
					d013b26719 | ||
| 
						 | 
					e2eca6f65e | ||
| 
						 | 
					a0904c5d80 | ||
| 
						 | 
					cb1fa58813 | ||
| 
						 | 
					3fd6332c05 | ||
| 
						 | 
					401d147893 | ||
| 
						 | 
					e2ee97dcd5 | ||
| 
						 | 
					f745403b5b | ||
| 
						 | 
					3e80e6f40d | ||
| 
						 | 
					25cb7a0eeb | ||
| 
						 | 
					abc97b5eda | ||
| 
						 | 
					04e88ca2ca | ||
| 
						 | 
					8d93c21466 | ||
| 
						 | 
					1dbfd78754 | ||
| 
						 | 
					22e35adefd | ||
| 
						 | 
					6f59aa934b | ||
| 
						 | 
					109db8ea64 | ||
| 
						 | 
					833b644fff | ||
| 
						 | 
					915620fd68 | ||
| 
						 | 
					ac12e888f9 | ||
| 
						 | 
					b1c6a5bac8 | ||
| 
						 | 
					7d08f6073d | ||
| 
						 | 
					758a059241 | ||
| 
						 | 
					4f8c56eb4e | ||
| 
						 | 
					57cf9b7f06 | ||
| 
						 | 
					9da526aae7 | ||
| 
						 | 
					75b81df3af | ||
| 
						 | 
					aabdc83d6e | ||
| 
						 | 
					2a48e6f01a | ||
| 
						 | 
					203a3c0e6a | ||
| 
						 | 
					d36724cca4 | ||
| 
						 | 
					15fc0658f7 | ||
| 
						 | 
					e960c3c223 | ||
| 
						 | 
					bc7e77a04b | ||
| 
						 | 
					964f49336f | ||
| 
						 | 
					57d8e32a3e | ||
| 
						 | 
					4174552391 | ||
| 
						 | 
					80bc4106af | ||
| 
						 | 
					7759be38da | ||
| 
						 | 
					a0a309b973 | ||
| 
						 | 
					c587cbb793 | ||
| 
						 | 
					6c52a86f54 | ||
| 
						 | 
					8a92e51c60 | ||
| 
						 | 
					f0e14fdd43 | ||
| 
						 | 
					df5f4e8888 | ||
| 
						 | 
					7960b0563b | ||
| 
						 | 
					5c9ced9504 | ||
| 
						 | 
					31c4448f6e | ||
| 
						 | 
					79a2e94e79 | ||
| 
						 | 
					686cc89634 | ||
| 
						 | 
					9508738f9a | ||
| 
						 | 
					78a3ff33ab | ||
| 
						 | 
					881dbc86c4 | ||
| 
						 | 
					8e7d004888 | ||
| 
						 | 
					9618c44824 | ||
| 
						 | 
					516ea41a7d | ||
| 
						 | 
					e2bd301ce7 | ||
| 
						 | 
					0c9d288ba0 | ||
| 
						 | 
					e0da32df6e | ||
| 
						 | 
					0d66bd0eab | ||
| 
						 | 
					14f7a2b8af | ||
| 
						 | 
					c0837a12c8 | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							@@ -6,8 +6,8 @@
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
 | 
			
		||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01**
 | 
			
		||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
 | 
			
		||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1**
 | 
			
		||||
 | 
			
		||||
### Before submitting an *issue* make sure you have:
 | 
			
		||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
 | 
			
		||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 | 
			
		||||
[debug] User config: []
 | 
			
		||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 | 
			
		||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
 | 
			
		||||
[debug] youtube-dl version 2016.05.01
 | 
			
		||||
[debug] youtube-dl version 2016.06.11.1
 | 
			
		||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 | 
			
		||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 | 
			
		||||
[debug] Proxy map: {}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -28,10 +28,16 @@ updates_key.pem
 | 
			
		||||
*.mp4
 | 
			
		||||
*.m4a
 | 
			
		||||
*.m4v
 | 
			
		||||
*.mp3
 | 
			
		||||
*.part
 | 
			
		||||
*.swp
 | 
			
		||||
test/testdata
 | 
			
		||||
test/local_parameters.json
 | 
			
		||||
.tox
 | 
			
		||||
youtube-dl.zsh
 | 
			
		||||
 | 
			
		||||
# IntelliJ related files
 | 
			
		||||
.idea
 | 
			
		||||
.idea/*
 | 
			
		||||
*.iml
 | 
			
		||||
 | 
			
		||||
tmp/
 | 
			
		||||
 
 | 
			
		||||
@@ -7,11 +7,13 @@ python:
 | 
			
		||||
  - "3.4"
 | 
			
		||||
  - "3.5"
 | 
			
		||||
sudo: false
 | 
			
		||||
install:
 | 
			
		||||
  - bash ./devscripts/install_srelay.sh
 | 
			
		||||
  - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
 | 
			
		||||
script: nosetests test --verbose
 | 
			
		||||
notifications:
 | 
			
		||||
  email:
 | 
			
		||||
    - filippo.valsorda@gmail.com
 | 
			
		||||
    - phihag@phihag.de
 | 
			
		||||
    - yasoob.khld@gmail.com
 | 
			
		||||
#  irc:
 | 
			
		||||
#    channels:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							@@ -169,3 +169,7 @@ Viťas Strádal
 | 
			
		||||
Kagami Hiiragi
 | 
			
		||||
Philip Huppert
 | 
			
		||||
blahgeek
 | 
			
		||||
Kevin Deldycke
 | 
			
		||||
inondle
 | 
			
		||||
Tomáš Čech
 | 
			
		||||
Déstin Reed
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										6
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								Makefile
									
									
									
									
									
								
							@@ -1,7 +1,7 @@
 | 
			
		||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
 | 
			
		||||
	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
 | 
			
		||||
	find . -name "*.pyc" -delete
 | 
			
		||||
	find . -name "*.class" -delete
 | 
			
		||||
 | 
			
		||||
@@ -37,7 +37,7 @@ test:
 | 
			
		||||
ot: offlinetest
 | 
			
		||||
 | 
			
		||||
offlinetest: codetest
 | 
			
		||||
	$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
 | 
			
		||||
	$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
 | 
			
		||||
 | 
			
		||||
tar: youtube-dl.tar.gz
 | 
			
		||||
 | 
			
		||||
@@ -69,7 +69,7 @@ README.txt: README.md
 | 
			
		||||
	pandoc -f markdown -t plain README.md -o README.txt
 | 
			
		||||
 | 
			
		||||
youtube-dl.1: README.md
 | 
			
		||||
	$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
 | 
			
		||||
	$(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
 | 
			
		||||
	pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
 | 
			
		||||
	rm -f youtube-dl.1.temp.md
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										67
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										67
									
								
								README.md
									
									
									
									
									
								
							@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
 | 
			
		||||
 | 
			
		||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
 | 
			
		||||
 | 
			
		||||
    sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
 | 
			
		||||
    sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
 | 
			
		||||
    sudo chmod a+rx /usr/local/bin/youtube-dl
 | 
			
		||||
 | 
			
		||||
If you do not have curl, you can alternatively use a recent wget:
 | 
			
		||||
@@ -25,15 +25,21 @@ If you do not have curl, you can alternatively use a recent wget:
 | 
			
		||||
    sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
 | 
			
		||||
    sudo chmod a+rx /usr/local/bin/youtube-dl
 | 
			
		||||
 | 
			
		||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
 | 
			
		||||
 | 
			
		||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
 | 
			
		||||
 | 
			
		||||
    brew install youtube-dl
 | 
			
		||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
 | 
			
		||||
 | 
			
		||||
You can also use pip:
 | 
			
		||||
 | 
			
		||||
    sudo pip install youtube-dl
 | 
			
		||||
    sudo pip install --upgrade youtube-dl
 | 
			
		||||
    
 | 
			
		||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
 | 
			
		||||
 | 
			
		||||
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
 | 
			
		||||
 | 
			
		||||
    brew install youtube-dl
 | 
			
		||||
 | 
			
		||||
Or with [MacPorts](https://www.macports.org/):
 | 
			
		||||
 | 
			
		||||
    sudo port install youtube-dl
 | 
			
		||||
 | 
			
		||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
 | 
			
		||||
 | 
			
		||||
@@ -73,8 +79,8 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     repairs broken URLs, but emits an error if
 | 
			
		||||
                                     this is not possible instead of searching.
 | 
			
		||||
    --ignore-config                  Do not read configuration files. When given
 | 
			
		||||
                                     in the global configuration file /etc
 | 
			
		||||
                                     /youtube-dl.conf: Do not read the user
 | 
			
		||||
                                     in the global configuration file
 | 
			
		||||
                                     /etc/youtube-dl.conf: Do not read the user
 | 
			
		||||
                                     configuration in ~/.config/youtube-
 | 
			
		||||
                                     dl/config (%APPDATA%/youtube-dl/config.txt
 | 
			
		||||
                                     on Windows)
 | 
			
		||||
@@ -85,9 +91,11 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
    --no-color                       Do not emit color codes in output
 | 
			
		||||
 | 
			
		||||
## Network Options:
 | 
			
		||||
    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
 | 
			
		||||
                                     an empty string (--proxy "") for direct
 | 
			
		||||
                                     connection
 | 
			
		||||
    --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
 | 
			
		||||
                                     To enable experimental SOCKS proxy, specify
 | 
			
		||||
                                     a proper scheme. For example
 | 
			
		||||
                                     socks5://127.0.0.1:1080/. Pass in an empty
 | 
			
		||||
                                     string (--proxy "") for direct connection
 | 
			
		||||
    --socket-timeout SECONDS         Time to wait before giving up, in seconds
 | 
			
		||||
    --source-address IP              Client-side IP address to bind to
 | 
			
		||||
                                     (experimental)
 | 
			
		||||
@@ -160,7 +168,7 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     (experimental)
 | 
			
		||||
 | 
			
		||||
## Download Options:
 | 
			
		||||
    -r, --rate-limit LIMIT           Maximum download rate in bytes per second
 | 
			
		||||
    -r, --limit-rate RATE            Maximum download rate in bytes per second
 | 
			
		||||
                                     (e.g. 50K or 4.2M)
 | 
			
		||||
    -R, --retries RETRIES            Number of retries (default is 10), or
 | 
			
		||||
                                     "infinite".
 | 
			
		||||
@@ -247,18 +255,19 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
    --write-info-json                Write video metadata to a .info.json file
 | 
			
		||||
    --write-annotations              Write video annotations to a
 | 
			
		||||
                                     .annotations.xml file
 | 
			
		||||
    --load-info FILE                 JSON file containing the video information
 | 
			
		||||
    --load-info-json FILE            JSON file containing the video information
 | 
			
		||||
                                     (created with the "--write-info-json"
 | 
			
		||||
                                     option)
 | 
			
		||||
    --cookies FILE                   File to read cookies from and dump cookie
 | 
			
		||||
                                     jar in
 | 
			
		||||
    --cache-dir DIR                  Location in the filesystem where youtube-dl
 | 
			
		||||
                                     can store some downloaded information
 | 
			
		||||
                                     permanently. By default $XDG_CACHE_HOME
 | 
			
		||||
                                     /youtube-dl or ~/.cache/youtube-dl . At the
 | 
			
		||||
                                     moment, only YouTube player files (for
 | 
			
		||||
                                     videos with obfuscated signatures) are
 | 
			
		||||
                                     cached, but that may change.
 | 
			
		||||
                                     permanently. By default
 | 
			
		||||
                                     $XDG_CACHE_HOME/youtube-dl or
 | 
			
		||||
                                     ~/.cache/youtube-dl . At the moment, only
 | 
			
		||||
                                     YouTube player files (for videos with
 | 
			
		||||
                                     obfuscated signatures) are cached, but that
 | 
			
		||||
                                     may change.
 | 
			
		||||
    --no-cache-dir                   Disable filesystem caching
 | 
			
		||||
    --rm-cache-dir                   Delete all filesystem cache files
 | 
			
		||||
 | 
			
		||||
@@ -415,7 +424,7 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
 | 
			
		||||
# CONFIGURATION
 | 
			
		||||
 | 
			
		||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
 | 
			
		||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
 | 
			
		||||
 | 
			
		||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
 | 
			
		||||
```
 | 
			
		||||
@@ -431,7 +440,7 @@ You can use `--ignore-config` if you want to disable the configuration file for
 | 
			
		||||
 | 
			
		||||
### Authentication with `.netrc` file
 | 
			
		||||
 | 
			
		||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
 | 
			
		||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
 | 
			
		||||
```
 | 
			
		||||
touch $HOME/.netrc
 | 
			
		||||
chmod a-rwx,u+rw $HOME/.netrc
 | 
			
		||||
@@ -465,7 +474,7 @@ The basic usage is not to set any template arguments when downloading a single f
 | 
			
		||||
 - `display_id`: An alternative identifier for the video
 | 
			
		||||
 - `uploader`: Full name of the video uploader
 | 
			
		||||
 - `license`: License name the video is licensed under
 | 
			
		||||
 - `creator`: The main artist who created the video
 | 
			
		||||
 - `creator`: The creator of the video
 | 
			
		||||
 - `release_date`: The date (YYYYMMDD) when the video was released
 | 
			
		||||
 - `timestamp`: UNIX timestamp of the moment the video became available
 | 
			
		||||
 - `upload_date`: Video upload date (YYYYMMDD)
 | 
			
		||||
@@ -691,6 +700,10 @@ hash -r
 | 
			
		||||
 | 
			
		||||
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
 | 
			
		||||
 | 
			
		||||
### youtube-dl is extremely slow to start on Windows
 | 
			
		||||
 | 
			
		||||
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
 | 
			
		||||
 | 
			
		||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
 | 
			
		||||
 | 
			
		||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
 | 
			
		||||
@@ -778,9 +791,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
 | 
			
		||||
 | 
			
		||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
 | 
			
		||||
 | 
			
		||||
### The exe throws a *Runtime error from Visual C++*
 | 
			
		||||
### The exe throws an error due to missing `MSVCR100.dll`
 | 
			
		||||
 | 
			
		||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
 | 
			
		||||
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
 | 
			
		||||
 | 
			
		||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
 | 
			
		||||
 | 
			
		||||
@@ -835,6 +848,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because
 | 
			
		||||
 | 
			
		||||
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
 | 
			
		||||
 | 
			
		||||
# Why do I need to go through that much red tape when filing bugs?
 | 
			
		||||
 | 
			
		||||
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
 | 
			
		||||
 | 
			
		||||
youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
 | 
			
		||||
 | 
			
		||||
# DEVELOPER INSTRUCTIONS
 | 
			
		||||
 | 
			
		||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
 | 
			
		||||
 
 | 
			
		||||
@@ -1,17 +1,38 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
from http.server import HTTPServer, BaseHTTPRequestHandler
 | 
			
		||||
from socketserver import ThreadingMixIn
 | 
			
		||||
import argparse
 | 
			
		||||
import ctypes
 | 
			
		||||
import functools
 | 
			
		||||
import shutil
 | 
			
		||||
import subprocess
 | 
			
		||||
import sys
 | 
			
		||||
import tempfile
 | 
			
		||||
import threading
 | 
			
		||||
import traceback
 | 
			
		||||
import os.path
 | 
			
		||||
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
 | 
			
		||||
from youtube_dl.compat import (
 | 
			
		||||
    compat_input,
 | 
			
		||||
    compat_http_server,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
 | 
			
		||||
# These are not used outside of buildserver.py thus not in compat.py
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    import winreg as compat_winreg
 | 
			
		||||
except ImportError:  # Python 2
 | 
			
		||||
    import _winreg as compat_winreg
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    import socketserver as compat_socketserver
 | 
			
		||||
except ImportError:  # Python 2
 | 
			
		||||
    import SocketServer as compat_socketserver
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
 | 
			
		||||
    allow_reuse_address = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -191,7 +212,7 @@ def main(args=None):
 | 
			
		||||
                        action='store_const', dest='action', const='service',
 | 
			
		||||
                        help='Run as a Windows service')
 | 
			
		||||
    parser.add_argument('-b', '--bind', metavar='<host:port>',
 | 
			
		||||
                        action='store', default='localhost:8142',
 | 
			
		||||
                        action='store', default='0.0.0.0:8142',
 | 
			
		||||
                        help='Bind to host:port (default %default)')
 | 
			
		||||
    options = parser.parse_args(args=args)
 | 
			
		||||
 | 
			
		||||
@@ -216,7 +237,7 @@ def main(args=None):
 | 
			
		||||
    srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
 | 
			
		||||
    thr = threading.Thread(target=srv.serve_forever)
 | 
			
		||||
    thr.start()
 | 
			
		||||
    input('Press ENTER to shut down')
 | 
			
		||||
    compat_input('Press ENTER to shut down')
 | 
			
		||||
    srv.shutdown()
 | 
			
		||||
    thr.join()
 | 
			
		||||
 | 
			
		||||
@@ -231,8 +252,6 @@ def rmtree(path):
 | 
			
		||||
            os.remove(fname)
 | 
			
		||||
    os.rmdir(path)
 | 
			
		||||
 | 
			
		||||
#==============================================================================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BuildError(Exception):
 | 
			
		||||
    def __init__(self, output, code=500):
 | 
			
		||||
@@ -249,15 +268,25 @@ class HTTPError(BuildError):
 | 
			
		||||
 | 
			
		||||
class PythonBuilder(object):
 | 
			
		||||
    def __init__(self, **kwargs):
 | 
			
		||||
        pythonVersion = kwargs.pop('python', '2.7')
 | 
			
		||||
        try:
 | 
			
		||||
            key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
 | 
			
		||||
        python_version = kwargs.pop('python', '3.4')
 | 
			
		||||
        python_path = None
 | 
			
		||||
        for node in ('Wow6432Node\\', ''):
 | 
			
		||||
            try:
 | 
			
		||||
                self.pythonPath, _ = _winreg.QueryValueEx(key, '')
 | 
			
		||||
            finally:
 | 
			
		||||
                _winreg.CloseKey(key)
 | 
			
		||||
        except Exception:
 | 
			
		||||
            raise BuildError('No such Python version: %s' % pythonVersion)
 | 
			
		||||
                key = compat_winreg.OpenKey(
 | 
			
		||||
                    compat_winreg.HKEY_LOCAL_MACHINE,
 | 
			
		||||
                    r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
 | 
			
		||||
                try:
 | 
			
		||||
                    python_path, _ = compat_winreg.QueryValueEx(key, '')
 | 
			
		||||
                finally:
 | 
			
		||||
                    compat_winreg.CloseKey(key)
 | 
			
		||||
                break
 | 
			
		||||
            except Exception:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        if not python_path:
 | 
			
		||||
            raise BuildError('No such Python version: %s' % python_version)
 | 
			
		||||
 | 
			
		||||
        self.pythonPath = python_path
 | 
			
		||||
 | 
			
		||||
        super(PythonBuilder, self).__init__(**kwargs)
 | 
			
		||||
 | 
			
		||||
@@ -305,8 +334,10 @@ class YoutubeDLBuilder(object):
 | 
			
		||||
 | 
			
		||||
    def build(self):
 | 
			
		||||
        try:
 | 
			
		||||
            subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
 | 
			
		||||
                                    cwd=self.buildPath)
 | 
			
		||||
            proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
 | 
			
		||||
            proc.wait()
 | 
			
		||||
            #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
 | 
			
		||||
            #                        cwd=self.buildPath)
 | 
			
		||||
        except subprocess.CalledProcessError as e:
 | 
			
		||||
            raise BuildError(e.output)
 | 
			
		||||
 | 
			
		||||
@@ -369,12 +400,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
 | 
			
		||||
class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 | 
			
		||||
    actionDict = {'build': Builder, 'download': Builder}  # They're the same, no more caching.
 | 
			
		||||
 | 
			
		||||
    def do_GET(self):
 | 
			
		||||
        path = urlparse.urlparse(self.path)
 | 
			
		||||
        paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
 | 
			
		||||
        path = compat_urlparse.urlparse(self.path)
 | 
			
		||||
        paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
 | 
			
		||||
        action, _, path = path.path.strip('/').partition('/')
 | 
			
		||||
        if path:
 | 
			
		||||
            path = path.split('/')
 | 
			
		||||
@@ -388,7 +419,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
 | 
			
		||||
                        builder.close()
 | 
			
		||||
                except BuildError as e:
 | 
			
		||||
                    self.send_response(e.code)
 | 
			
		||||
                    msg = unicode(e).encode('UTF-8')
 | 
			
		||||
                    msg = compat_str(e).encode('UTF-8')
 | 
			
		||||
                    self.send_header('Content-Type', 'text/plain; charset=UTF-8')
 | 
			
		||||
                    self.send_header('Content-Length', len(msg))
 | 
			
		||||
                    self.end_headers()
 | 
			
		||||
@@ -400,7 +431,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
 | 
			
		||||
        else:
 | 
			
		||||
            self.send_response(500, 'Malformed URL')
 | 
			
		||||
 | 
			
		||||
#==============================================================================
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										111
									
								
								devscripts/create-github-release.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								devscripts/create-github-release.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
import json
 | 
			
		||||
import mimetypes
 | 
			
		||||
import netrc
 | 
			
		||||
import optparse
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
from youtube_dl.compat import (
 | 
			
		||||
    compat_basestring,
 | 
			
		||||
    compat_input,
 | 
			
		||||
    compat_getpass,
 | 
			
		||||
    compat_print,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
)
 | 
			
		||||
from youtube_dl.utils import (
 | 
			
		||||
    make_HTTPS_handler,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GitHubReleaser(object):
 | 
			
		||||
    _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
 | 
			
		||||
    _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
 | 
			
		||||
    _NETRC_MACHINE = 'github.com'
 | 
			
		||||
 | 
			
		||||
    def __init__(self, debuglevel=0):
 | 
			
		||||
        self._init_github_account()
 | 
			
		||||
        https_handler = make_HTTPS_handler({}, debuglevel=debuglevel)
 | 
			
		||||
        self._opener = compat_urllib_request.build_opener(https_handler)
 | 
			
		||||
 | 
			
		||||
    def _init_github_account(self):
 | 
			
		||||
        try:
 | 
			
		||||
            info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 | 
			
		||||
            if info is not None:
 | 
			
		||||
                self._username = info[0]
 | 
			
		||||
                self._password = info[2]
 | 
			
		||||
                compat_print('Using GitHub credentials found in .netrc...')
 | 
			
		||||
                return
 | 
			
		||||
            else:
 | 
			
		||||
                compat_print('No GitHub credentials found in .netrc')
 | 
			
		||||
        except (IOError, netrc.NetrcParseError):
 | 
			
		||||
            compat_print('Unable to parse .netrc')
 | 
			
		||||
        self._username = compat_input(
 | 
			
		||||
            'Type your GitHub username or email address and press [Return]: ')
 | 
			
		||||
        self._password = compat_getpass(
 | 
			
		||||
            'Type your GitHub password and press [Return]: ')
 | 
			
		||||
 | 
			
		||||
    def _call(self, req):
 | 
			
		||||
        if isinstance(req, compat_basestring):
 | 
			
		||||
            req = sanitized_Request(req)
 | 
			
		||||
        # Authorizing manually since GitHub does not response with 401 with
 | 
			
		||||
        # WWW-Authenticate header set (see
 | 
			
		||||
        # https://developer.github.com/v3/#basic-authentication)
 | 
			
		||||
        b64 = base64.b64encode(
 | 
			
		||||
            ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
 | 
			
		||||
        req.add_header('Authorization', 'Basic %s' % b64)
 | 
			
		||||
        response = self._opener.open(req).read().decode('utf-8')
 | 
			
		||||
        return json.loads(response)
 | 
			
		||||
 | 
			
		||||
    def list_releases(self):
 | 
			
		||||
        return self._call(self._API_URL)
 | 
			
		||||
 | 
			
		||||
    def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False):
 | 
			
		||||
        data = {
 | 
			
		||||
            'tag_name': tag_name,
 | 
			
		||||
            'target_commitish': 'master',
 | 
			
		||||
            'name': name,
 | 
			
		||||
            'body': body,
 | 
			
		||||
            'draft': draft,
 | 
			
		||||
            'prerelease': prerelease,
 | 
			
		||||
        }
 | 
			
		||||
        req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8'))
 | 
			
		||||
        return self._call(req)
 | 
			
		||||
 | 
			
		||||
    def create_asset(self, release_id, asset):
 | 
			
		||||
        asset_name = os.path.basename(asset)
 | 
			
		||||
        url = self._UPLOADS_URL % (release_id, asset_name)
 | 
			
		||||
        # Our files are small enough to be loaded directly into memory.
 | 
			
		||||
        data = open(asset, 'rb').read()
 | 
			
		||||
        req = sanitized_Request(url, data)
 | 
			
		||||
        mime_type, _ = mimetypes.guess_type(asset_name)
 | 
			
		||||
        req.add_header('Content-Type', mime_type or 'application/octet-stream')
 | 
			
		||||
        return self._call(req)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
 | 
			
		||||
    options, args = parser.parse_args()
 | 
			
		||||
    if len(args) != 2:
 | 
			
		||||
        parser.error('Expected a version and a build directory')
 | 
			
		||||
 | 
			
		||||
    version, build_path = args
 | 
			
		||||
 | 
			
		||||
    releaser = GitHubReleaser()
 | 
			
		||||
 | 
			
		||||
    new_release = releaser.create_release(version, name='youtube-dl %s' % version)
 | 
			
		||||
    release_id = new_release['id']
 | 
			
		||||
 | 
			
		||||
    for asset in os.listdir(build_path):
 | 
			
		||||
        compat_print('Uploading %s...' % asset)
 | 
			
		||||
        releaser.create_asset(release_id, os.path.join(build_path, asset))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
							
								
								
									
										8
									
								
								devscripts/install_srelay.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										8
									
								
								devscripts/install_srelay.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,8 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
mkdir -p tmp && cd tmp
 | 
			
		||||
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
 | 
			
		||||
tar zxvf srelay-0.4.8b6.tar.gz
 | 
			
		||||
cd srelay-0.4.8b6
 | 
			
		||||
./configure
 | 
			
		||||
make
 | 
			
		||||
@@ -1,13 +1,46 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import io
 | 
			
		||||
import optparse
 | 
			
		||||
import os.path
 | 
			
		||||
import sys
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 | 
			
		||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
 | 
			
		||||
 | 
			
		||||
PREFIX = '''%YOUTUBE-DL(1)
 | 
			
		||||
 | 
			
		||||
# NAME
 | 
			
		||||
 | 
			
		||||
youtube\-dl \- download videos from youtube.com or other video platforms
 | 
			
		||||
 | 
			
		||||
# SYNOPSIS
 | 
			
		||||
 | 
			
		||||
**youtube-dl** \[OPTIONS\] URL [URL...]
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    parser = optparse.OptionParser(usage='%prog OUTFILE.md')
 | 
			
		||||
    options, args = parser.parse_args()
 | 
			
		||||
    if len(args) != 1:
 | 
			
		||||
        parser.error('Expected an output filename')
 | 
			
		||||
 | 
			
		||||
    outfile, = args
 | 
			
		||||
 | 
			
		||||
    with io.open(README_FILE, encoding='utf-8') as f:
 | 
			
		||||
        readme = f.read()
 | 
			
		||||
 | 
			
		||||
    readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
 | 
			
		||||
    readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
 | 
			
		||||
    readme = PREFIX + readme
 | 
			
		||||
 | 
			
		||||
    readme = filter_options(readme)
 | 
			
		||||
 | 
			
		||||
    with io.open(outfile, 'w', encoding='utf-8') as outf:
 | 
			
		||||
        outf.write(readme)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def filter_options(readme):
 | 
			
		||||
    ret = ''
 | 
			
		||||
@@ -37,27 +70,5 @@ def filter_options(readme):
 | 
			
		||||
 | 
			
		||||
    return ret
 | 
			
		||||
 | 
			
		||||
with io.open(README_FILE, encoding='utf-8') as f:
 | 
			
		||||
    readme = f.read()
 | 
			
		||||
 | 
			
		||||
PREFIX = '''%YOUTUBE-DL(1)
 | 
			
		||||
 | 
			
		||||
# NAME
 | 
			
		||||
 | 
			
		||||
youtube\-dl \- download videos from youtube.com or other video platforms
 | 
			
		||||
 | 
			
		||||
# SYNOPSIS
 | 
			
		||||
 | 
			
		||||
**youtube-dl** \[OPTIONS\] URL [URL...]
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
 | 
			
		||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
 | 
			
		||||
readme = PREFIX + readme
 | 
			
		||||
 | 
			
		||||
readme = filter_options(readme)
 | 
			
		||||
 | 
			
		||||
if sys.version_info < (3, 0):
 | 
			
		||||
    print(readme.encode('utf-8'))
 | 
			
		||||
else:
 | 
			
		||||
    print(readme)
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
# * the git config user.signingkey is properly set
 | 
			
		||||
 | 
			
		||||
# You will need
 | 
			
		||||
# pip install coverage nose rsa
 | 
			
		||||
# pip install coverage nose rsa wheel
 | 
			
		||||
 | 
			
		||||
# TODO
 | 
			
		||||
# release notes
 | 
			
		||||
@@ -15,10 +15,28 @@
 | 
			
		||||
set -e
 | 
			
		||||
 | 
			
		||||
skip_tests=true
 | 
			
		||||
if [ "$1" = '--run-tests' ]; then
 | 
			
		||||
    skip_tests=false
 | 
			
		||||
    shift
 | 
			
		||||
fi
 | 
			
		||||
buildserver='localhost:8142'
 | 
			
		||||
 | 
			
		||||
while true
 | 
			
		||||
do
 | 
			
		||||
case "$1" in
 | 
			
		||||
    --run-tests)
 | 
			
		||||
        skip_tests=false
 | 
			
		||||
        shift
 | 
			
		||||
    ;;
 | 
			
		||||
    --buildserver)
 | 
			
		||||
        buildserver="$2"
 | 
			
		||||
        shift 2
 | 
			
		||||
    ;;
 | 
			
		||||
    --*)
 | 
			
		||||
        echo "ERROR: unknown option $1"
 | 
			
		||||
        exit 1
 | 
			
		||||
    ;;
 | 
			
		||||
    *)
 | 
			
		||||
        break
 | 
			
		||||
    ;;
 | 
			
		||||
esac
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
 | 
			
		||||
version="$1"
 | 
			
		||||
@@ -33,6 +51,9 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th
 | 
			
		||||
useless_files=$(find youtube_dl -type f -not -name '*.py')
 | 
			
		||||
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
 | 
			
		||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
 | 
			
		||||
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
 | 
			
		||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
 | 
			
		||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
 | 
			
		||||
 | 
			
		||||
/bin/echo -e "\n### First of all, testing..."
 | 
			
		||||
make clean
 | 
			
		||||
@@ -64,7 +85,7 @@ git push origin "$version"
 | 
			
		||||
REV=$(git rev-parse HEAD)
 | 
			
		||||
make youtube-dl youtube-dl.tar.gz
 | 
			
		||||
read -p "VM running? (y/n) " -n 1
 | 
			
		||||
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
 | 
			
		||||
wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
 | 
			
		||||
mkdir -p "build/$version"
 | 
			
		||||
mv youtube-dl youtube-dl.exe "build/$version"
 | 
			
		||||
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
 | 
			
		||||
@@ -74,15 +95,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 | 
			
		||||
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
 | 
			
		||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 | 
			
		||||
 | 
			
		||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
 | 
			
		||||
/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
 | 
			
		||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
 | 
			
		||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
 | 
			
		||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
 | 
			
		||||
 | 
			
		||||
ROOT=$(pwd)
 | 
			
		||||
python devscripts/create-github-release.py $version "$ROOT/build/$version"
 | 
			
		||||
 | 
			
		||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
 | 
			
		||||
 | 
			
		||||
/bin/echo -e "\n### Now switching to gh-pages..."
 | 
			
		||||
git clone --branch gh-pages --single-branch . build/gh-pages
 | 
			
		||||
ROOT=$(pwd)
 | 
			
		||||
(
 | 
			
		||||
    set -e
 | 
			
		||||
    ORIGIN_URL=$(git config --get remote.origin.url)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@
 | 
			
		||||
 - **22tracks:genre**
 | 
			
		||||
 - **22tracks:track**
 | 
			
		||||
 - **24video**
 | 
			
		||||
 - **3qsdn**: 3Q SDN
 | 
			
		||||
 - **3sat**
 | 
			
		||||
 - **4tube**
 | 
			
		||||
 - **56.com**
 | 
			
		||||
@@ -15,6 +16,8 @@
 | 
			
		||||
 - **9gag**
 | 
			
		||||
 - **abc.net.au**
 | 
			
		||||
 - **Abc7News**
 | 
			
		||||
 - **abcnews**
 | 
			
		||||
 - **abcnews:video**
 | 
			
		||||
 - **AcademicEarth:Course**
 | 
			
		||||
 - **acast**
 | 
			
		||||
 - **acast:channel**
 | 
			
		||||
@@ -25,6 +28,7 @@
 | 
			
		||||
 - **AdobeTVVideo**
 | 
			
		||||
 - **AdultSwim**
 | 
			
		||||
 - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
 | 
			
		||||
 - **AfreecaTV**: afreecatv.com
 | 
			
		||||
 - **Aftonbladet**
 | 
			
		||||
 - **AirMozilla**
 | 
			
		||||
 - **AlJazeera**
 | 
			
		||||
@@ -40,8 +44,8 @@
 | 
			
		||||
 - **appletrailers:section**
 | 
			
		||||
 - **archive.org**: archive.org videos
 | 
			
		||||
 - **ARD**
 | 
			
		||||
 - **ARD:mediathek**: Saarländischer Rundfunk
 | 
			
		||||
 - **ARD:mediathek**
 | 
			
		||||
 - **ARD:mediathek**: Saarländischer Rundfunk
 | 
			
		||||
 - **arte.tv**
 | 
			
		||||
 - **arte.tv:+7**
 | 
			
		||||
 - **arte.tv:cinema**
 | 
			
		||||
@@ -52,6 +56,7 @@
 | 
			
		||||
 - **arte.tv:future**
 | 
			
		||||
 - **arte.tv:info**
 | 
			
		||||
 - **arte.tv:magazine**
 | 
			
		||||
 - **arte.tv:playlist**
 | 
			
		||||
 - **AtresPlayer**
 | 
			
		||||
 - **ATTTechChannel**
 | 
			
		||||
 - **AudiMedia**
 | 
			
		||||
@@ -77,6 +82,7 @@
 | 
			
		||||
 - **Bild**: Bild.de
 | 
			
		||||
 - **BiliBili**
 | 
			
		||||
 - **BioBioChileTV**
 | 
			
		||||
 - **BIQLE**
 | 
			
		||||
 - **BleacherReport**
 | 
			
		||||
 - **BleacherReportCMS**
 | 
			
		||||
 - **blinkx**
 | 
			
		||||
@@ -102,6 +108,7 @@
 | 
			
		||||
 - **CBCPlayer**
 | 
			
		||||
 - **CBS**
 | 
			
		||||
 - **CBSInteractive**
 | 
			
		||||
 - **CBSLocal**
 | 
			
		||||
 - **CBSNews**: CBS News
 | 
			
		||||
 - **CBSNewsLiveVideo**: CBS News Live Videos
 | 
			
		||||
 - **CBSSports**
 | 
			
		||||
@@ -113,7 +120,6 @@
 | 
			
		||||
 - **chirbit**
 | 
			
		||||
 - **chirbit:profile**
 | 
			
		||||
 - **Cinchcast**
 | 
			
		||||
 - **Cinemassacre**
 | 
			
		||||
 - **Clipfish**
 | 
			
		||||
 - **cliphunter**
 | 
			
		||||
 - **ClipRs**
 | 
			
		||||
@@ -127,12 +133,12 @@
 | 
			
		||||
 - **CNN**
 | 
			
		||||
 - **CNNArticle**
 | 
			
		||||
 - **CNNBlogs**
 | 
			
		||||
 - **CollegeHumor**
 | 
			
		||||
 - **CollegeRama**
 | 
			
		||||
 - **ComCarCoff**
 | 
			
		||||
 - **ComedyCentral**
 | 
			
		||||
 - **ComedyCentralShows**: The Daily Show / The Colbert Report
 | 
			
		||||
 - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
 | 
			
		||||
 - **Coub**
 | 
			
		||||
 - **Cracked**
 | 
			
		||||
 - **Crackle**
 | 
			
		||||
 - **Criterion**
 | 
			
		||||
@@ -145,6 +151,7 @@
 | 
			
		||||
 - **culturebox.francetvinfo.fr**
 | 
			
		||||
 - **CultureUnplugged**
 | 
			
		||||
 - **CWTV**
 | 
			
		||||
 - **DailyMail**
 | 
			
		||||
 - **dailymotion**
 | 
			
		||||
 - **dailymotion:playlist**
 | 
			
		||||
 - **dailymotion:user**
 | 
			
		||||
@@ -201,6 +208,7 @@
 | 
			
		||||
 - **exfm**: ex.fm
 | 
			
		||||
 - **ExpoTV**
 | 
			
		||||
 - **ExtremeTube**
 | 
			
		||||
 - **EyedoTV**
 | 
			
		||||
 - **facebook**
 | 
			
		||||
 - **faz.net**
 | 
			
		||||
 - **fc2**
 | 
			
		||||
@@ -212,6 +220,7 @@
 | 
			
		||||
 - **Flickr**
 | 
			
		||||
 - **Folketinget**: Folketinget (ft.dk; Danish parliament)
 | 
			
		||||
 - **FootyRoom**
 | 
			
		||||
 - **Formula1**
 | 
			
		||||
 - **FOX**
 | 
			
		||||
 - **Foxgay**
 | 
			
		||||
 - **FoxNews**: Fox News and Fox Business Video
 | 
			
		||||
@@ -245,6 +254,7 @@
 | 
			
		||||
 - **Globo**
 | 
			
		||||
 - **GloboArticle**
 | 
			
		||||
 - **GodTube**
 | 
			
		||||
 - **GodTV**
 | 
			
		||||
 - **GoldenMoustache**
 | 
			
		||||
 - **Golem**
 | 
			
		||||
 - **GoogleDrive**
 | 
			
		||||
@@ -315,20 +325,24 @@
 | 
			
		||||
 - **la7.tv**
 | 
			
		||||
 - **Laola1Tv**
 | 
			
		||||
 - **Le**: 乐视网
 | 
			
		||||
 - **Learnr**
 | 
			
		||||
 - **Lecture2Go**
 | 
			
		||||
 - **Lemonde**
 | 
			
		||||
 - **LePlaylist**
 | 
			
		||||
 - **LetvCloud**: 乐视云
 | 
			
		||||
 - **Libsyn**
 | 
			
		||||
 - **life**: Life.ru
 | 
			
		||||
 - **life:embed**
 | 
			
		||||
 - **lifenews**: LIFE | NEWS
 | 
			
		||||
 - **limelight**
 | 
			
		||||
 - **limelight:channel**
 | 
			
		||||
 - **limelight:channel_list**
 | 
			
		||||
 - **LiTV**
 | 
			
		||||
 - **LiveLeak**
 | 
			
		||||
 - **livestream**
 | 
			
		||||
 - **livestream:original**
 | 
			
		||||
 - **LnkGo**
 | 
			
		||||
 - **loc**: Library of Congress
 | 
			
		||||
 - **LocalNews8**
 | 
			
		||||
 - **LoveHomePorn**
 | 
			
		||||
 - **lrt.lt**
 | 
			
		||||
 - **lynda**: lynda.com videos
 | 
			
		||||
@@ -374,6 +388,8 @@
 | 
			
		||||
 - **mtvservices:embedded**
 | 
			
		||||
 - **MuenchenTV**: münchen.tv
 | 
			
		||||
 - **MusicPlayOn**
 | 
			
		||||
 - **mva**: Microsoft Virtual Academy videos
 | 
			
		||||
 - **mva:course**: Microsoft Virtual Academy courses
 | 
			
		||||
 - **Mwave**
 | 
			
		||||
 - **MwaveMeetGreet**
 | 
			
		||||
 - **MySpace**
 | 
			
		||||
@@ -463,7 +479,8 @@
 | 
			
		||||
 - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET  (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
 | 
			
		||||
 - **pcmag**
 | 
			
		||||
 - **People**
 | 
			
		||||
 - **Periscope**: Periscope
 | 
			
		||||
 - **periscope**: Periscope
 | 
			
		||||
 - **periscope:user**: Periscope user videos
 | 
			
		||||
 - **PhilharmonieDeParis**: Philharmonie de Paris
 | 
			
		||||
 - **phoenix.de**
 | 
			
		||||
 - **Photobucket**
 | 
			
		||||
@@ -501,6 +518,8 @@
 | 
			
		||||
 - **R7**
 | 
			
		||||
 - **radio.de**
 | 
			
		||||
 - **radiobremen**
 | 
			
		||||
 - **radiocanada**
 | 
			
		||||
 - **RadioCanadaAudioVideo**
 | 
			
		||||
 - **radiofrance**
 | 
			
		||||
 - **RadioJavan**
 | 
			
		||||
 - **Rai**
 | 
			
		||||
@@ -510,8 +529,10 @@
 | 
			
		||||
 - **RedTube**
 | 
			
		||||
 - **RegioTV**
 | 
			
		||||
 - **Restudy**
 | 
			
		||||
 - **Reuters**
 | 
			
		||||
 - **ReverbNation**
 | 
			
		||||
 - **Revision3**
 | 
			
		||||
 - **revision**
 | 
			
		||||
 - **revision3:embed**
 | 
			
		||||
 - **RICE**
 | 
			
		||||
 - **RingTV**
 | 
			
		||||
 - **RottenTomatoes**
 | 
			
		||||
@@ -550,7 +571,9 @@
 | 
			
		||||
 - **ScreencastOMatic**
 | 
			
		||||
 - **ScreenJunkies**
 | 
			
		||||
 - **ScreenwaveMedia**
 | 
			
		||||
 - **Seeker**
 | 
			
		||||
 - **SenateISVP**
 | 
			
		||||
 - **SendtoNews**
 | 
			
		||||
 - **ServingSys**
 | 
			
		||||
 - **Sexu**
 | 
			
		||||
 - **Shahid**
 | 
			
		||||
@@ -670,11 +693,10 @@
 | 
			
		||||
 - **TVCArticle**
 | 
			
		||||
 - **tvigle**: Интернет-телевидение Tvigle.ru
 | 
			
		||||
 - **tvland.com**
 | 
			
		||||
 - **tvp.pl**
 | 
			
		||||
 - **tvp.pl:Series**
 | 
			
		||||
 - **tvp**: Telewizja Polska
 | 
			
		||||
 - **tvp:series**
 | 
			
		||||
 - **TVPlay**: TV3Play and related services
 | 
			
		||||
 - **Tweakers**
 | 
			
		||||
 - **twitch:bookmarks**
 | 
			
		||||
 - **twitch:chapter**
 | 
			
		||||
 - **twitch:past_broadcasts**
 | 
			
		||||
 - **twitch:profile**
 | 
			
		||||
@@ -692,7 +714,8 @@
 | 
			
		||||
 - **USAToday**
 | 
			
		||||
 - **ustream**
 | 
			
		||||
 - **ustream:channel**
 | 
			
		||||
 - **Ustudio**
 | 
			
		||||
 - **ustudio**
 | 
			
		||||
 - **ustudio:embed**
 | 
			
		||||
 - **Varzesh3**
 | 
			
		||||
 - **Vbox7**
 | 
			
		||||
 - **VeeHD**
 | 
			
		||||
@@ -700,6 +723,7 @@
 | 
			
		||||
 - **Vessel**
 | 
			
		||||
 - **Vesti**: Вести.Ru
 | 
			
		||||
 - **Vevo**
 | 
			
		||||
 - **VevoPlaylist**
 | 
			
		||||
 - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
 | 
			
		||||
 - **vh1.com**
 | 
			
		||||
 - **Vice**
 | 
			
		||||
@@ -716,6 +740,7 @@
 | 
			
		||||
 - **VideoPremium**
 | 
			
		||||
 - **VideoTt**: video.tt - Your True Tube (Currently broken)
 | 
			
		||||
 - **videoweed**: VideoWeed
 | 
			
		||||
 - **Vidio**
 | 
			
		||||
 - **vidme**
 | 
			
		||||
 - **vidme:user**
 | 
			
		||||
 - **vidme:user:likes**
 | 
			
		||||
@@ -751,17 +776,15 @@
 | 
			
		||||
 - **VRT**
 | 
			
		||||
 - **vube**: Vube.com
 | 
			
		||||
 - **VuClip**
 | 
			
		||||
 - **vulture.com**
 | 
			
		||||
 - **Walla**
 | 
			
		||||
 - **WashingtonPost**
 | 
			
		||||
 - **washingtonpost**
 | 
			
		||||
 - **washingtonpost:article**
 | 
			
		||||
 - **wat.tv**
 | 
			
		||||
 - **WatchIndianPorn**: Watch Indian Porn
 | 
			
		||||
 - **WDR**
 | 
			
		||||
 - **wdr:mobile**
 | 
			
		||||
 - **WDRMaus**: Sendung mit der Maus
 | 
			
		||||
 - **WebOfStories**
 | 
			
		||||
 - **WebOfStoriesPlaylist**
 | 
			
		||||
 - **Weibo**
 | 
			
		||||
 - **WeiqiTV**: WQTV
 | 
			
		||||
 - **wholecloud**: WholeCloud
 | 
			
		||||
 - **Wimp**
 | 
			
		||||
@@ -772,7 +795,7 @@
 | 
			
		||||
 - **WSJ**: Wall Street Journal
 | 
			
		||||
 - **XBef**
 | 
			
		||||
 - **XboxClips**
 | 
			
		||||
 - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
 | 
			
		||||
 - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To
 | 
			
		||||
 - **XHamster**
 | 
			
		||||
 - **XHamsterEmbed**
 | 
			
		||||
 - **xiami:album**: 虾米音乐 - 专辑
 | 
			
		||||
@@ -797,6 +820,7 @@
 | 
			
		||||
 - **Ynet**
 | 
			
		||||
 - **YouJizz**
 | 
			
		||||
 - **youku**: 优酷
 | 
			
		||||
 - **youku:show**
 | 
			
		||||
 - **YouPorn**
 | 
			
		||||
 - **YourUpload**
 | 
			
		||||
 - **youtube**: YouTube.com
 | 
			
		||||
 
 | 
			
		||||
@@ -24,8 +24,13 @@ from youtube_dl.utils import (
 | 
			
		||||
def get_params(override=None):
 | 
			
		||||
    PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
 | 
			
		||||
                                   "parameters.json")
 | 
			
		||||
    LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
 | 
			
		||||
                                         "local_parameters.json")
 | 
			
		||||
    with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
 | 
			
		||||
        parameters = json.load(pf)
 | 
			
		||||
    if os.path.exists(LOCAL_PARAMETERS_FILE):
 | 
			
		||||
        with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
 | 
			
		||||
            parameters.update(json.load(pf))
 | 
			
		||||
    if override:
 | 
			
		||||
        parameters.update(override)
 | 
			
		||||
    return parameters
 | 
			
		||||
 
 | 
			
		||||
@@ -10,13 +10,14 @@ import unittest
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from youtube_dl.utils import get_filesystem_encoding
 | 
			
		||||
from youtube_dl.compat import (
 | 
			
		||||
    compat_getenv,
 | 
			
		||||
    compat_setenv,
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_expanduser,
 | 
			
		||||
    compat_shlex_split,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_struct_unpack,
 | 
			
		||||
    compat_urllib_parse_unquote,
 | 
			
		||||
    compat_urllib_parse_unquote_plus,
 | 
			
		||||
    compat_urllib_parse_urlencode,
 | 
			
		||||
@@ -26,19 +27,22 @@ from youtube_dl.compat import (
 | 
			
		||||
class TestCompat(unittest.TestCase):
 | 
			
		||||
    def test_compat_getenv(self):
 | 
			
		||||
        test_str = 'тест'
 | 
			
		||||
        os.environ['YOUTUBE-DL-TEST'] = (
 | 
			
		||||
            test_str if sys.version_info >= (3, 0)
 | 
			
		||||
            else test_str.encode(get_filesystem_encoding()))
 | 
			
		||||
        compat_setenv('YOUTUBE-DL-TEST', test_str)
 | 
			
		||||
        self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
 | 
			
		||||
 | 
			
		||||
    def test_compat_setenv(self):
 | 
			
		||||
        test_var = 'YOUTUBE-DL-TEST'
 | 
			
		||||
        test_str = 'тест'
 | 
			
		||||
        compat_setenv(test_var, test_str)
 | 
			
		||||
        compat_getenv(test_var)
 | 
			
		||||
        self.assertEqual(compat_getenv(test_var), test_str)
 | 
			
		||||
 | 
			
		||||
    def test_compat_expanduser(self):
 | 
			
		||||
        old_home = os.environ.get('HOME')
 | 
			
		||||
        test_str = 'C:\Documents and Settings\тест\Application Data'
 | 
			
		||||
        os.environ['HOME'] = (
 | 
			
		||||
            test_str if sys.version_info >= (3, 0)
 | 
			
		||||
            else test_str.encode(get_filesystem_encoding()))
 | 
			
		||||
        compat_setenv('HOME', test_str)
 | 
			
		||||
        self.assertEqual(compat_expanduser('~'), test_str)
 | 
			
		||||
        os.environ['HOME'] = old_home
 | 
			
		||||
        compat_setenv('HOME', old_home or '')
 | 
			
		||||
 | 
			
		||||
    def test_all_present(self):
 | 
			
		||||
        import youtube_dl.compat
 | 
			
		||||
@@ -99,5 +103,15 @@ class TestCompat(unittest.TestCase):
 | 
			
		||||
        self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
 | 
			
		||||
        self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
 | 
			
		||||
 | 
			
		||||
    def test_compat_etree_fromstring_doctype(self):
 | 
			
		||||
        xml = '''<?xml version="1.0"?>
 | 
			
		||||
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
 | 
			
		||||
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
 | 
			
		||||
        compat_etree_fromstring(xml)
 | 
			
		||||
 | 
			
		||||
    def test_struct_unpack(self):
 | 
			
		||||
        self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -16,6 +16,15 @@ import threading
 | 
			
		||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def http_server_port(httpd):
 | 
			
		||||
    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
 | 
			
		||||
        # In Jython SSLSocket is not a subclass of socket.socket
 | 
			
		||||
        sock = httpd.socket.sock
 | 
			
		||||
    else:
 | 
			
		||||
        sock = httpd.socket
 | 
			
		||||
    return sock.getsockname()[1]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 | 
			
		||||
    def log_message(self, format, *args):
 | 
			
		||||
        pass
 | 
			
		||||
@@ -31,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 | 
			
		||||
            self.send_header('Content-Type', 'video/mp4')
 | 
			
		||||
            self.end_headers()
 | 
			
		||||
            self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
 | 
			
		||||
        elif self.path == '/302':
 | 
			
		||||
            if sys.version_info[0] == 3:
 | 
			
		||||
                # XXX: Python 3 http server does not allow non-ASCII header values
 | 
			
		||||
                self.send_response(404)
 | 
			
		||||
                self.end_headers()
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
            new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
 | 
			
		||||
            self.send_response(302)
 | 
			
		||||
            self.send_header(b'Location', new_url.encode('utf-8'))
 | 
			
		||||
            self.end_headers()
 | 
			
		||||
        elif self.path == '/%E4%B8%AD%E6%96%87.html':
 | 
			
		||||
            self.send_response(200)
 | 
			
		||||
            self.send_header('Content-Type', 'text/html; charset=utf-8')
 | 
			
		||||
            self.end_headers()
 | 
			
		||||
            self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
 | 
			
		||||
        else:
 | 
			
		||||
            assert False
 | 
			
		||||
 | 
			
		||||
@@ -47,18 +72,32 @@ class FakeLogger(object):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestHTTP(unittest.TestCase):
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        self.httpd = compat_http_server.HTTPServer(
 | 
			
		||||
            ('localhost', 0), HTTPTestRequestHandler)
 | 
			
		||||
        self.port = http_server_port(self.httpd)
 | 
			
		||||
        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
 | 
			
		||||
        self.server_thread.daemon = True
 | 
			
		||||
        self.server_thread.start()
 | 
			
		||||
 | 
			
		||||
    def test_unicode_path_redirection(self):
 | 
			
		||||
        # XXX: Python 3 http server does not allow non-ASCII header values
 | 
			
		||||
        if sys.version_info[0] == 3:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        ydl = YoutubeDL({'logger': FakeLogger()})
 | 
			
		||||
        r = ydl.extract_info('http://localhost:%d/302' % self.port)
 | 
			
		||||
        self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestHTTPS(unittest.TestCase):
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        certfn = os.path.join(TEST_DIR, 'testcert.pem')
 | 
			
		||||
        self.httpd = compat_http_server.HTTPServer(
 | 
			
		||||
            ('localhost', 0), HTTPTestRequestHandler)
 | 
			
		||||
        self.httpd.socket = ssl.wrap_socket(
 | 
			
		||||
            self.httpd.socket, certfile=certfn, server_side=True)
 | 
			
		||||
        if os.name == 'java':
 | 
			
		||||
            # In Jython SSLSocket is not a subclass of socket.socket
 | 
			
		||||
            sock = self.httpd.socket.sock
 | 
			
		||||
        else:
 | 
			
		||||
            sock = self.httpd.socket
 | 
			
		||||
        self.port = sock.getsockname()[1]
 | 
			
		||||
        self.port = http_server_port(self.httpd)
 | 
			
		||||
        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
 | 
			
		||||
        self.server_thread.daemon = True
 | 
			
		||||
        self.server_thread.start()
 | 
			
		||||
@@ -94,14 +133,14 @@ class TestProxy(unittest.TestCase):
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        self.proxy = compat_http_server.HTTPServer(
 | 
			
		||||
            ('localhost', 0), _build_proxy_handler('normal'))
 | 
			
		||||
        self.port = self.proxy.socket.getsockname()[1]
 | 
			
		||||
        self.port = http_server_port(self.proxy)
 | 
			
		||||
        self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
 | 
			
		||||
        self.proxy_thread.daemon = True
 | 
			
		||||
        self.proxy_thread.start()
 | 
			
		||||
 | 
			
		||||
        self.cn_proxy = compat_http_server.HTTPServer(
 | 
			
		||||
            ('localhost', 0), _build_proxy_handler('cn'))
 | 
			
		||||
        self.cn_port = self.cn_proxy.socket.getsockname()[1]
 | 
			
		||||
        self.cn_port = http_server_port(self.cn_proxy)
 | 
			
		||||
        self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
 | 
			
		||||
        self.cn_proxy_thread.daemon = True
 | 
			
		||||
        self.cn_proxy_thread.start()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										118
									
								
								test/test_socks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										118
									
								
								test/test_socks.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,118 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import unittest
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
import random
 | 
			
		||||
import subprocess
 | 
			
		||||
 | 
			
		||||
from test.helper import (
 | 
			
		||||
    FakeYDL,
 | 
			
		||||
    get_params,
 | 
			
		||||
)
 | 
			
		||||
from youtube_dl.compat import (
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestMultipleSocks(unittest.TestCase):
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _check_params(attrs):
 | 
			
		||||
        params = get_params()
 | 
			
		||||
        for attr in attrs:
 | 
			
		||||
            if attr not in params:
 | 
			
		||||
                print('Missing %s. Skipping.' % attr)
 | 
			
		||||
                return
 | 
			
		||||
        return params
 | 
			
		||||
 | 
			
		||||
    def test_proxy_http(self):
 | 
			
		||||
        params = self._check_params(['primary_proxy', 'primary_server_ip'])
 | 
			
		||||
        if params is None:
 | 
			
		||||
            return
 | 
			
		||||
        ydl = FakeYDL({
 | 
			
		||||
            'proxy': params['primary_proxy']
 | 
			
		||||
        })
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'),
 | 
			
		||||
            params['primary_server_ip'])
 | 
			
		||||
 | 
			
		||||
    def test_proxy_https(self):
 | 
			
		||||
        params = self._check_params(['primary_proxy', 'primary_server_ip'])
 | 
			
		||||
        if params is None:
 | 
			
		||||
            return
 | 
			
		||||
        ydl = FakeYDL({
 | 
			
		||||
            'proxy': params['primary_proxy']
 | 
			
		||||
        })
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'),
 | 
			
		||||
            params['primary_server_ip'])
 | 
			
		||||
 | 
			
		||||
    def test_secondary_proxy_http(self):
 | 
			
		||||
        params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
 | 
			
		||||
        if params is None:
 | 
			
		||||
            return
 | 
			
		||||
        ydl = FakeYDL()
 | 
			
		||||
        req = compat_urllib_request.Request('http://yt-dl.org/ip')
 | 
			
		||||
        req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            ydl.urlopen(req).read().decode('utf-8'),
 | 
			
		||||
            params['secondary_server_ip'])
 | 
			
		||||
 | 
			
		||||
    def test_secondary_proxy_https(self):
 | 
			
		||||
        params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
 | 
			
		||||
        if params is None:
 | 
			
		||||
            return
 | 
			
		||||
        ydl = FakeYDL()
 | 
			
		||||
        req = compat_urllib_request.Request('https://yt-dl.org/ip')
 | 
			
		||||
        req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            ydl.urlopen(req).read().decode('utf-8'),
 | 
			
		||||
            params['secondary_server_ip'])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestSocks(unittest.TestCase):
 | 
			
		||||
    _SKIP_SOCKS_TEST = True
 | 
			
		||||
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        if self._SKIP_SOCKS_TEST:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self.port = random.randint(20000, 30000)
 | 
			
		||||
        self.server_process = subprocess.Popen([
 | 
			
		||||
            'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
 | 
			
		||||
            stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
			
		||||
 | 
			
		||||
    def tearDown(self):
 | 
			
		||||
        if self._SKIP_SOCKS_TEST:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self.server_process.terminate()
 | 
			
		||||
        self.server_process.communicate()
 | 
			
		||||
 | 
			
		||||
    def _get_ip(self, protocol):
 | 
			
		||||
        if self._SKIP_SOCKS_TEST:
 | 
			
		||||
            return '127.0.0.1'
 | 
			
		||||
 | 
			
		||||
        ydl = FakeYDL({
 | 
			
		||||
            'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
 | 
			
		||||
        })
 | 
			
		||||
        return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8')
 | 
			
		||||
 | 
			
		||||
    def test_socks4(self):
 | 
			
		||||
        self.assertTrue(isinstance(self._get_ip('socks4'), compat_str))
 | 
			
		||||
 | 
			
		||||
    def test_socks4a(self):
 | 
			
		||||
        self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str))
 | 
			
		||||
 | 
			
		||||
    def test_socks5(self):
 | 
			
		||||
        self.assertTrue(isinstance(self._get_ip('socks5'), compat_str))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -50,12 +50,13 @@ from youtube_dl.utils import (
 | 
			
		||||
    sanitize_path,
 | 
			
		||||
    prepend_extension,
 | 
			
		||||
    replace_extension,
 | 
			
		||||
    remove_start,
 | 
			
		||||
    remove_end,
 | 
			
		||||
    remove_quotes,
 | 
			
		||||
    shell_quote,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    strip_jsonp,
 | 
			
		||||
    struct_unpack,
 | 
			
		||||
    timeconvert,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
@@ -139,8 +140,8 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
 | 
			
		||||
        self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
 | 
			
		||||
 | 
			
		||||
        tests = 'a\xe4b\u4e2d\u56fd\u7684c'
 | 
			
		||||
        self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
 | 
			
		||||
        tests = 'aäb\u4e2d\u56fd\u7684c'
 | 
			
		||||
        self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
 | 
			
		||||
        self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename
 | 
			
		||||
 | 
			
		||||
        forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
 | 
			
		||||
@@ -155,6 +156,10 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertTrue(sanitize_filename('-', restricted=True) != '')
 | 
			
		||||
        self.assertTrue(sanitize_filename(':', restricted=True) != '')
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(sanitize_filename(
 | 
			
		||||
            'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
 | 
			
		||||
            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
 | 
			
		||||
 | 
			
		||||
    def test_sanitize_ids(self):
 | 
			
		||||
        self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
 | 
			
		||||
        self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
 | 
			
		||||
@@ -212,6 +217,16 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
 | 
			
		||||
        self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
 | 
			
		||||
 | 
			
		||||
    def test_remove_start(self):
 | 
			
		||||
        self.assertEqual(remove_start(None, 'A - '), None)
 | 
			
		||||
        self.assertEqual(remove_start('A - B', 'A - '), 'B')
 | 
			
		||||
        self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
 | 
			
		||||
 | 
			
		||||
    def test_remove_end(self):
 | 
			
		||||
        self.assertEqual(remove_end(None, ' - B'), None)
 | 
			
		||||
        self.assertEqual(remove_end('A - B', ' - B'), 'A')
 | 
			
		||||
        self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
 | 
			
		||||
 | 
			
		||||
    def test_remove_quotes(self):
 | 
			
		||||
        self.assertEqual(remove_quotes(None), None)
 | 
			
		||||
        self.assertEqual(remove_quotes('"'), '"')
 | 
			
		||||
@@ -234,6 +249,8 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(unescapeHTML('/'), '/')
 | 
			
		||||
        self.assertEqual(unescapeHTML('é'), 'é')
 | 
			
		||||
        self.assertEqual(unescapeHTML('�'), '�')
 | 
			
		||||
        # HTML5 entities
 | 
			
		||||
        self.assertEqual(unescapeHTML('.''), '.\'')
 | 
			
		||||
 | 
			
		||||
    def test_date_from_str(self):
 | 
			
		||||
        self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
 | 
			
		||||
@@ -453,9 +470,6 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        testPL(5, 2, (2, 99), [2, 3, 4])
 | 
			
		||||
        testPL(5, 2, (20, 99), [])
 | 
			
		||||
 | 
			
		||||
    def test_struct_unpack(self):
 | 
			
		||||
        self.assertEqual(struct_unpack('!B', b'\x00'), (0,))
 | 
			
		||||
 | 
			
		||||
    def test_read_batch_urls(self):
 | 
			
		||||
        f = io.StringIO('''\xef\xbb\xbf foo
 | 
			
		||||
            bar\r
 | 
			
		||||
@@ -617,6 +631,15 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        json_code = js_to_json(inp)
 | 
			
		||||
        self.assertEqual(json.loads(json_code), json.loads(inp))
 | 
			
		||||
 | 
			
		||||
        inp = '''{
 | 
			
		||||
            0:{src:'skipped', type: 'application/dash+xml'},
 | 
			
		||||
            1:{src:'skipped', type: 'application/vnd.apple.mpegURL'},
 | 
			
		||||
        }'''
 | 
			
		||||
        self.assertEqual(js_to_json(inp), '''{
 | 
			
		||||
            "0":{"src":"skipped", "type": "application/dash+xml"},
 | 
			
		||||
            "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
 | 
			
		||||
        }''')
 | 
			
		||||
 | 
			
		||||
    def test_js_to_json_edgecases(self):
 | 
			
		||||
        on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
 | 
			
		||||
        self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
 | 
			
		||||
@@ -640,6 +663,27 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        on = js_to_json('{"abc": "def",}')
 | 
			
		||||
        self.assertEqual(json.loads(on), {'abc': 'def'})
 | 
			
		||||
 | 
			
		||||
        on = js_to_json('{ 0: /* " \n */ ",]" , }')
 | 
			
		||||
        self.assertEqual(json.loads(on), {'0': ',]'})
 | 
			
		||||
 | 
			
		||||
        on = js_to_json(r'["<p>x<\/p>"]')
 | 
			
		||||
        self.assertEqual(json.loads(on), ['<p>x</p>'])
 | 
			
		||||
 | 
			
		||||
        on = js_to_json(r'["\xaa"]')
 | 
			
		||||
        self.assertEqual(json.loads(on), ['\u00aa'])
 | 
			
		||||
 | 
			
		||||
        on = js_to_json("['a\\\nb']")
 | 
			
		||||
        self.assertEqual(json.loads(on), ['ab'])
 | 
			
		||||
 | 
			
		||||
        on = js_to_json('{0xff:0xff}')
 | 
			
		||||
        self.assertEqual(json.loads(on), {'255': 255})
 | 
			
		||||
 | 
			
		||||
        on = js_to_json('{077:077}')
 | 
			
		||||
        self.assertEqual(json.loads(on), {'63': 63})
 | 
			
		||||
 | 
			
		||||
        on = js_to_json('{42:42}')
 | 
			
		||||
        self.assertEqual(json.loads(on), {'42': 42})
 | 
			
		||||
 | 
			
		||||
    def test_extract_attributes(self):
 | 
			
		||||
        self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
 | 
			
		||||
        self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								tox.ini
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								tox.ini
									
									
									
									
									
								
							@@ -9,5 +9,6 @@ passenv = HOME
 | 
			
		||||
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
 | 
			
		||||
    --exclude test_subtitles.py --exclude test_write_annotations.py
 | 
			
		||||
    --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
 | 
			
		||||
    --exclude test_socks.py
 | 
			
		||||
commands = nosetests --verbose {posargs:{[testenv]defaultargs}}  # --with-coverage --cover-package=youtube_dl --cover-html
 | 
			
		||||
                                               # test.test_download:TestDownload.test_NowVideo
 | 
			
		||||
 
 | 
			
		||||
@@ -64,6 +64,7 @@ from .utils import (
 | 
			
		||||
    PostProcessingError,
 | 
			
		||||
    preferredencoding,
 | 
			
		||||
    prepend_extension,
 | 
			
		||||
    register_socks_protocols,
 | 
			
		||||
    render_table,
 | 
			
		||||
    replace_extension,
 | 
			
		||||
    SameFileError,
 | 
			
		||||
@@ -325,7 +326,7 @@ class YoutubeDL(object):
 | 
			
		||||
                        ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 | 
			
		||||
                self._output_channel = os.fdopen(master, 'rb')
 | 
			
		||||
            except OSError as ose:
 | 
			
		||||
                if ose.errno == 2:
 | 
			
		||||
                if ose.errno == errno.ENOENT:
 | 
			
		||||
                    self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 | 
			
		||||
                else:
 | 
			
		||||
                    raise
 | 
			
		||||
@@ -361,6 +362,8 @@ class YoutubeDL(object):
 | 
			
		||||
        for ph in self.params.get('progress_hooks', []):
 | 
			
		||||
            self.add_progress_hook(ph)
 | 
			
		||||
 | 
			
		||||
        register_socks_protocols()
 | 
			
		||||
 | 
			
		||||
    def warn_if_short_id(self, argv):
 | 
			
		||||
        # short YouTube ID starting with dash?
 | 
			
		||||
        idxs = [
 | 
			
		||||
@@ -580,7 +583,7 @@ class YoutubeDL(object):
 | 
			
		||||
                is_id=(k == 'id'))
 | 
			
		||||
            template_dict = dict((k, sanitize(k, v))
 | 
			
		||||
                                 for k, v in template_dict.items()
 | 
			
		||||
                                 if v is not None)
 | 
			
		||||
                                 if v is not None and not isinstance(v, (list, tuple, dict)))
 | 
			
		||||
            template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 | 
			
		||||
 | 
			
		||||
            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 | 
			
		||||
@@ -717,6 +720,7 @@ class YoutubeDL(object):
 | 
			
		||||
        result_type = ie_result.get('_type', 'video')
 | 
			
		||||
 | 
			
		||||
        if result_type in ('url', 'url_transparent'):
 | 
			
		||||
            ie_result['url'] = sanitize_url(ie_result['url'])
 | 
			
		||||
            extract_flat = self.params.get('extract_flat', False)
 | 
			
		||||
            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 | 
			
		||||
                    extract_flat is True):
 | 
			
		||||
@@ -1219,6 +1223,10 @@ class YoutubeDL(object):
 | 
			
		||||
        if 'title' not in info_dict:
 | 
			
		||||
            raise ExtractorError('Missing "title" field in extractor result')
 | 
			
		||||
 | 
			
		||||
        if not isinstance(info_dict['id'], compat_str):
 | 
			
		||||
            self.report_warning('"id" field is not a string - forcing string conversion')
 | 
			
		||||
            info_dict['id'] = compat_str(info_dict['id'])
 | 
			
		||||
 | 
			
		||||
        if 'playlist' not in info_dict:
 | 
			
		||||
            # It isn't part of a playlist
 | 
			
		||||
            info_dict['playlist'] = None
 | 
			
		||||
@@ -1639,7 +1647,7 @@ class YoutubeDL(object):
 | 
			
		||||
                    # Just a single file
 | 
			
		||||
                    success = dl(filename, info_dict)
 | 
			
		||||
            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
                self.report_error('unable to download video data: %s' % str(err))
 | 
			
		||||
                self.report_error('unable to download video data: %s' % error_to_compat_str(err))
 | 
			
		||||
                return
 | 
			
		||||
            except (OSError, IOError) as err:
 | 
			
		||||
                raise UnavailableVideoError(err)
 | 
			
		||||
@@ -2018,6 +2026,7 @@ class YoutubeDL(object):
 | 
			
		||||
        if opts_cookiefile is None:
 | 
			
		||||
            self.cookiejar = compat_cookiejar.CookieJar()
 | 
			
		||||
        else:
 | 
			
		||||
            opts_cookiefile = compat_expanduser(opts_cookiefile)
 | 
			
		||||
            self.cookiejar = compat_cookiejar.MozillaCookieJar(
 | 
			
		||||
                opts_cookiefile)
 | 
			
		||||
            if os.access(opts_cookiefile, os.R_OK):
 | 
			
		||||
 
 | 
			
		||||
@@ -18,7 +18,6 @@ from .options import (
 | 
			
		||||
from .compat import (
 | 
			
		||||
    compat_expanduser,
 | 
			
		||||
    compat_getpass,
 | 
			
		||||
    compat_print,
 | 
			
		||||
    compat_shlex_split,
 | 
			
		||||
    workaround_optparse_bug9161,
 | 
			
		||||
)
 | 
			
		||||
@@ -67,16 +66,16 @@ def _real_main(argv=None):
 | 
			
		||||
    # Custom HTTP headers
 | 
			
		||||
    if opts.headers is not None:
 | 
			
		||||
        for h in opts.headers:
 | 
			
		||||
            if h.find(':', 1) < 0:
 | 
			
		||||
            if ':' not in h:
 | 
			
		||||
                parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
 | 
			
		||||
            key, value = h.split(':', 2)
 | 
			
		||||
            key, value = h.split(':', 1)
 | 
			
		||||
            if opts.verbose:
 | 
			
		||||
                write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
 | 
			
		||||
            std_headers[key] = value
 | 
			
		||||
 | 
			
		||||
    # Dump user agent
 | 
			
		||||
    if opts.dump_user_agent:
 | 
			
		||||
        compat_print(std_headers['User-Agent'])
 | 
			
		||||
        write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
 | 
			
		||||
        sys.exit(0)
 | 
			
		||||
 | 
			
		||||
    # Batch file verification
 | 
			
		||||
@@ -86,7 +85,9 @@ def _real_main(argv=None):
 | 
			
		||||
            if opts.batchfile == '-':
 | 
			
		||||
                batchfd = sys.stdin
 | 
			
		||||
            else:
 | 
			
		||||
                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
 | 
			
		||||
                batchfd = io.open(
 | 
			
		||||
                    compat_expanduser(opts.batchfile),
 | 
			
		||||
                    'r', encoding='utf-8', errors='ignore')
 | 
			
		||||
            batch_urls = read_batch_urls(batchfd)
 | 
			
		||||
            if opts.verbose:
 | 
			
		||||
                write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
 | 
			
		||||
@@ -99,10 +100,10 @@ def _real_main(argv=None):
 | 
			
		||||
 | 
			
		||||
    if opts.list_extractors:
 | 
			
		||||
        for ie in list_extractors(opts.age_limit):
 | 
			
		||||
            compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
 | 
			
		||||
            write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
 | 
			
		||||
            matchedUrls = [url for url in all_urls if ie.suitable(url)]
 | 
			
		||||
            for mu in matchedUrls:
 | 
			
		||||
                compat_print('  ' + mu)
 | 
			
		||||
                write_string('  ' + mu + '\n', out=sys.stdout)
 | 
			
		||||
        sys.exit(0)
 | 
			
		||||
    if opts.list_extractor_descriptions:
 | 
			
		||||
        for ie in list_extractors(opts.age_limit):
 | 
			
		||||
@@ -115,7 +116,7 @@ def _real_main(argv=None):
 | 
			
		||||
                _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
 | 
			
		||||
                _COUNTS = ('', '5', '10', 'all')
 | 
			
		||||
                desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
 | 
			
		||||
            compat_print(desc)
 | 
			
		||||
            write_string(desc + '\n', out=sys.stdout)
 | 
			
		||||
        sys.exit(0)
 | 
			
		||||
 | 
			
		||||
    # Conflicting, missing and erroneous options
 | 
			
		||||
@@ -404,7 +405,7 @@ def _real_main(argv=None):
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            if opts.load_info_filename is not None:
 | 
			
		||||
                retcode = ydl.download_with_info_file(opts.load_info_filename)
 | 
			
		||||
                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
 | 
			
		||||
            else:
 | 
			
		||||
                retcode = ydl.download(all_urls)
 | 
			
		||||
        except MaxDownloadsReached:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2315
									
								
								youtube_dl/compat.py
									
									
									
									
									
								
							
							
						
						
									
										2315
									
								
								youtube_dl/compat.py
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -6,6 +6,7 @@ import sys
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import FileDownloader
 | 
			
		||||
from ..compat import compat_setenv
 | 
			
		||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    cli_option,
 | 
			
		||||
@@ -198,6 +199,19 @@ class FFmpegFD(ExternalFD):
 | 
			
		||||
                '-headers',
 | 
			
		||||
                ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
 | 
			
		||||
 | 
			
		||||
        env = None
 | 
			
		||||
        proxy = self.params.get('proxy')
 | 
			
		||||
        if proxy:
 | 
			
		||||
            if not re.match(r'^[\da-zA-Z]+://', proxy):
 | 
			
		||||
                proxy = 'http://%s' % proxy
 | 
			
		||||
            # Since December 2015 ffmpeg supports -http_proxy option (see
 | 
			
		||||
            # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
 | 
			
		||||
            # We could switch to the following code if we are able to detect version properly
 | 
			
		||||
            # args += ['-http_proxy', proxy]
 | 
			
		||||
            env = os.environ.copy()
 | 
			
		||||
            compat_setenv('HTTP_PROXY', proxy, env=env)
 | 
			
		||||
            compat_setenv('http_proxy', proxy, env=env)
 | 
			
		||||
 | 
			
		||||
        protocol = info_dict.get('protocol')
 | 
			
		||||
 | 
			
		||||
        if protocol == 'rtmp':
 | 
			
		||||
@@ -224,7 +238,7 @@ class FFmpegFD(ExternalFD):
 | 
			
		||||
                args += ['-rtmp_live', 'live']
 | 
			
		||||
 | 
			
		||||
        args += ['-i', url, '-c', 'copy']
 | 
			
		||||
        if protocol == 'm3u8':
 | 
			
		||||
        if protocol in ('m3u8', 'm3u8_native'):
 | 
			
		||||
            if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
 | 
			
		||||
                args += ['-f', 'mpegts']
 | 
			
		||||
            else:
 | 
			
		||||
@@ -239,7 +253,7 @@ class FFmpegFD(ExternalFD):
 | 
			
		||||
 | 
			
		||||
        self._debug_cmd(args)
 | 
			
		||||
 | 
			
		||||
        proc = subprocess.Popen(args, stdin=subprocess.PIPE)
 | 
			
		||||
        proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
 | 
			
		||||
        try:
 | 
			
		||||
            retval = proc.wait()
 | 
			
		||||
        except KeyboardInterrupt:
 | 
			
		||||
 
 | 
			
		||||
@@ -12,37 +12,49 @@ from ..compat import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_struct_pack,
 | 
			
		||||
    compat_struct_unpack,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
    fix_xml_ampersands,
 | 
			
		||||
    sanitize_open,
 | 
			
		||||
    struct_pack,
 | 
			
		||||
    struct_unpack,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DataTruncatedError(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FlvReader(io.BytesIO):
 | 
			
		||||
    """
 | 
			
		||||
    Reader for Flv files
 | 
			
		||||
    The file format is documented in https://www.adobe.com/devnet/f4v.html
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def read_bytes(self, n):
 | 
			
		||||
        data = self.read(n)
 | 
			
		||||
        if len(data) < n:
 | 
			
		||||
            raise DataTruncatedError(
 | 
			
		||||
                'FlvReader error: need %d bytes while only %d bytes got' % (
 | 
			
		||||
                    n, len(data)))
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    # Utility functions for reading numbers and strings
 | 
			
		||||
    def read_unsigned_long_long(self):
 | 
			
		||||
        return struct_unpack('!Q', self.read(8))[0]
 | 
			
		||||
        return compat_struct_unpack('!Q', self.read_bytes(8))[0]
 | 
			
		||||
 | 
			
		||||
    def read_unsigned_int(self):
 | 
			
		||||
        return struct_unpack('!I', self.read(4))[0]
 | 
			
		||||
        return compat_struct_unpack('!I', self.read_bytes(4))[0]
 | 
			
		||||
 | 
			
		||||
    def read_unsigned_char(self):
 | 
			
		||||
        return struct_unpack('!B', self.read(1))[0]
 | 
			
		||||
        return compat_struct_unpack('!B', self.read_bytes(1))[0]
 | 
			
		||||
 | 
			
		||||
    def read_string(self):
 | 
			
		||||
        res = b''
 | 
			
		||||
        while True:
 | 
			
		||||
            char = self.read(1)
 | 
			
		||||
            char = self.read_bytes(1)
 | 
			
		||||
            if char == b'\x00':
 | 
			
		||||
                break
 | 
			
		||||
            res += char
 | 
			
		||||
@@ -53,18 +65,18 @@ class FlvReader(io.BytesIO):
 | 
			
		||||
        Read a box and return the info as a tuple: (box_size, box_type, box_data)
 | 
			
		||||
        """
 | 
			
		||||
        real_size = size = self.read_unsigned_int()
 | 
			
		||||
        box_type = self.read(4)
 | 
			
		||||
        box_type = self.read_bytes(4)
 | 
			
		||||
        header_end = 8
 | 
			
		||||
        if size == 1:
 | 
			
		||||
            real_size = self.read_unsigned_long_long()
 | 
			
		||||
            header_end = 16
 | 
			
		||||
        return real_size, box_type, self.read(real_size - header_end)
 | 
			
		||||
        return real_size, box_type, self.read_bytes(real_size - header_end)
 | 
			
		||||
 | 
			
		||||
    def read_asrt(self):
 | 
			
		||||
        # version
 | 
			
		||||
        self.read_unsigned_char()
 | 
			
		||||
        # flags
 | 
			
		||||
        self.read(3)
 | 
			
		||||
        self.read_bytes(3)
 | 
			
		||||
        quality_entry_count = self.read_unsigned_char()
 | 
			
		||||
        # QualityEntryCount
 | 
			
		||||
        for i in range(quality_entry_count):
 | 
			
		||||
@@ -85,7 +97,7 @@ class FlvReader(io.BytesIO):
 | 
			
		||||
        # version
 | 
			
		||||
        self.read_unsigned_char()
 | 
			
		||||
        # flags
 | 
			
		||||
        self.read(3)
 | 
			
		||||
        self.read_bytes(3)
 | 
			
		||||
        # time scale
 | 
			
		||||
        self.read_unsigned_int()
 | 
			
		||||
 | 
			
		||||
@@ -119,7 +131,7 @@ class FlvReader(io.BytesIO):
 | 
			
		||||
        # version
 | 
			
		||||
        self.read_unsigned_char()
 | 
			
		||||
        # flags
 | 
			
		||||
        self.read(3)
 | 
			
		||||
        self.read_bytes(3)
 | 
			
		||||
 | 
			
		||||
        self.read_unsigned_int()  # BootstrapinfoVersion
 | 
			
		||||
        # Profile,Live,Update,Reserved
 | 
			
		||||
@@ -194,11 +206,11 @@ def build_fragments_list(boot_info):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def write_unsigned_int(stream, val):
 | 
			
		||||
    stream.write(struct_pack('!I', val))
 | 
			
		||||
    stream.write(compat_struct_pack('!I', val))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def write_unsigned_int_24(stream, val):
 | 
			
		||||
    stream.write(struct_pack('!I', val)[1:])
 | 
			
		||||
    stream.write(compat_struct_pack('!I', val)[1:])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def write_flv_header(stream):
 | 
			
		||||
@@ -307,7 +319,7 @@ class F4mFD(FragmentFD):
 | 
			
		||||
        doc = compat_etree_fromstring(manifest)
 | 
			
		||||
        formats = [(int(f.attrib.get('bitrate', -1)), f)
 | 
			
		||||
                   for f in self._get_unencrypted_media(doc)]
 | 
			
		||||
        if requested_bitrate is None:
 | 
			
		||||
        if requested_bitrate is None or len(formats) == 1:
 | 
			
		||||
            # get the best format
 | 
			
		||||
            formats = sorted(formats, key=lambda f: f[0])
 | 
			
		||||
            rate, media = formats[-1]
 | 
			
		||||
@@ -374,7 +386,17 @@ class F4mFD(FragmentFD):
 | 
			
		||||
                down.close()
 | 
			
		||||
                reader = FlvReader(down_data)
 | 
			
		||||
                while True:
 | 
			
		||||
                    _, box_type, box_data = reader.read_box_info()
 | 
			
		||||
                    try:
 | 
			
		||||
                        _, box_type, box_data = reader.read_box_info()
 | 
			
		||||
                    except DataTruncatedError:
 | 
			
		||||
                        if test:
 | 
			
		||||
                            # In tests, segments may be truncated, and thus
 | 
			
		||||
                            # FlvReader may not be able to parse the whole
 | 
			
		||||
                            # chunk. If so, write the segment as is
 | 
			
		||||
                            # See https://github.com/rg3/youtube-dl/issues/9214
 | 
			
		||||
                            dest_stream.write(down_data)
 | 
			
		||||
                            break
 | 
			
		||||
                        raise
 | 
			
		||||
                    if box_type == b'mdat':
 | 
			
		||||
                        dest_stream.write(box_data)
 | 
			
		||||
                        break
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,7 @@ import os.path
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .fragment import FragmentFD
 | 
			
		||||
from .external import FFmpegFD
 | 
			
		||||
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
from ..utils import (
 | 
			
		||||
@@ -17,12 +18,45 @@ class HlsFD(FragmentFD):
 | 
			
		||||
 | 
			
		||||
    FD_NAME = 'hlsnative'
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def can_download(manifest):
 | 
			
		||||
        UNSUPPORTED_FEATURES = (
 | 
			
		||||
            r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1]
 | 
			
		||||
            r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
 | 
			
		||||
 | 
			
		||||
            # Live streams heuristic does not always work (e.g. geo restricted to Germany
 | 
			
		||||
            # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
 | 
			
		||||
            # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)',  # live streams [3]
 | 
			
		||||
 | 
			
		||||
            # This heuristic also is not correct since segments may not be appended as well.
 | 
			
		||||
            # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
 | 
			
		||||
            # no segments will definitely be appended to the end of the playlist.
 | 
			
		||||
            # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of
 | 
			
		||||
            #                                 # event media playlists [4]
 | 
			
		||||
 | 
			
		||||
            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
 | 
			
		||||
            # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
 | 
			
		||||
            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
 | 
			
		||||
            # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
 | 
			
		||||
        )
 | 
			
		||||
        return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
 | 
			
		||||
 | 
			
		||||
    def real_download(self, filename, info_dict):
 | 
			
		||||
        man_url = info_dict['url']
 | 
			
		||||
        self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
 | 
			
		||||
        manifest = self.ydl.urlopen(man_url).read()
 | 
			
		||||
 | 
			
		||||
        s = manifest.decode('utf-8', 'ignore')
 | 
			
		||||
 | 
			
		||||
        if not self.can_download(s):
 | 
			
		||||
            self.report_warning(
 | 
			
		||||
                'hlsnative has detected features it does not support, '
 | 
			
		||||
                'extraction will be delegated to ffmpeg')
 | 
			
		||||
            fd = FFmpegFD(self.ydl, self.params)
 | 
			
		||||
            for ph in self._progress_hooks:
 | 
			
		||||
                fd.add_progress_hook(ph)
 | 
			
		||||
            return fd.real_download(filename, info_dict)
 | 
			
		||||
 | 
			
		||||
        fragment_urls = []
 | 
			
		||||
        for line in s.splitlines():
 | 
			
		||||
            line = line.strip()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										135
									
								
								youtube_dl/extractor/abcnews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								youtube_dl/extractor/abcnews.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,135 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import calendar
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .amp import AMPIE
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AbcNewsVideoIE(AMPIE):
 | 
			
		||||
    IE_NAME = 'abcnews:video'
 | 
			
		||||
    _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '20411932',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'display_id': 'week-exclusive-irans-foreign-minister-zarif',
 | 
			
		||||
            'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
 | 
			
		||||
            'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
 | 
			
		||||
            'duration': 180,
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        info_dict = self._extract_feed_info(
 | 
			
		||||
            'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
 | 
			
		||||
        info_dict.update({
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
        })
 | 
			
		||||
        return info_dict
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AbcNewsIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'abcnews'
 | 
			
		||||
    _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '10498713',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'display_id': 'dramatic-video-rare-death-job-america',
 | 
			
		||||
            'title': 'Occupational Hazards',
 | 
			
		||||
            'description': 'Nightline investigates the dangers that lurk at various jobs.',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            'upload_date': '20100428',
 | 
			
		||||
            'timestamp': 1272412800,
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['AbcNewsVideo'],
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '39125818',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
 | 
			
		||||
            'title': 'Justin Timberlake Drops Hints For Secret Single',
 | 
			
		||||
            'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
 | 
			
		||||
            'upload_date': '20160515',
 | 
			
		||||
            'timestamp': 1463329500,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
            # The embedded YouTube video is blocked due to copyright issues
 | 
			
		||||
            'playlist_items': '1',
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['AbcNewsVideo'],
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
 | 
			
		||||
        full_video_url = compat_urlparse.urljoin(url, video_url)
 | 
			
		||||
 | 
			
		||||
        youtube_url = self._html_search_regex(
 | 
			
		||||
            r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
 | 
			
		||||
            webpage, 'YouTube URL', default=None)
 | 
			
		||||
 | 
			
		||||
        timestamp = None
 | 
			
		||||
        date_str = self._html_search_regex(
 | 
			
		||||
            r'<span[^>]+class="timestamp">([^<]+)</span>',
 | 
			
		||||
            webpage, 'timestamp', fatal=False)
 | 
			
		||||
        if date_str:
 | 
			
		||||
            tz_offset = 0
 | 
			
		||||
            if date_str.endswith(' ET'):  # Eastern Time
 | 
			
		||||
                tz_offset = -5
 | 
			
		||||
                date_str = date_str[:-3]
 | 
			
		||||
            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
 | 
			
		||||
            for date_format in date_formats:
 | 
			
		||||
                try:
 | 
			
		||||
                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
 | 
			
		||||
                except ValueError:
 | 
			
		||||
                    continue
 | 
			
		||||
            if timestamp is not None:
 | 
			
		||||
                timestamp -= tz_offset * 3600
 | 
			
		||||
 | 
			
		||||
        entry = {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'ie_key': AbcNewsVideoIE.ie_key(),
 | 
			
		||||
            'url': full_video_url,
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if youtube_url:
 | 
			
		||||
            entries = [entry, self.url_result(youtube_url, 'Youtube')]
 | 
			
		||||
            return self.playlist_result(entries)
 | 
			
		||||
 | 
			
		||||
        return entry
 | 
			
		||||
							
								
								
									
										133
									
								
								youtube_dl/extractor/afreecatv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								youtube_dl/extractor/afreecatv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,133 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    xpath_element,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AfreecaTVIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'afreecatv.com'
 | 
			
		||||
    _VALID_URL = r'''(?x)^
 | 
			
		||||
        https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
 | 
			
		||||
        (?:
 | 
			
		||||
            /app/(?:index|read_ucc_bbs)\.cgi|
 | 
			
		||||
            /player/[Pp]layer\.(?:swf|html))
 | 
			
		||||
        \?.*?\bnTitleNo=(?P<id>\d+)'''
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
 | 
			
		||||
        'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '36164052',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': '데일리 에이프릴 요정들의 시상식!',
 | 
			
		||||
            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
 | 
			
		||||
            'uploader': 'dailyapril',
 | 
			
		||||
            'uploader_id': 'dailyapril',
 | 
			
		||||
            'upload_date': '20160503',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '36153164',
 | 
			
		||||
            'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
 | 
			
		||||
            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
 | 
			
		||||
            'uploader': 'dailyapril',
 | 
			
		||||
            'uploader_id': 'dailyapril',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 2,
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'md5': 'd8b7c174568da61d774ef0203159bf97',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '36153164_1',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
 | 
			
		||||
                'upload_date': '20160502',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'md5': '58f2ce7f6044e34439ab2d50612ab02b',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '36153164_2',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
 | 
			
		||||
                'upload_date': '20160502',
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def parse_video_key(key):
 | 
			
		||||
        video_key = {}
 | 
			
		||||
        m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
 | 
			
		||||
        if m:
 | 
			
		||||
            video_key['upload_date'] = m.group('upload_date')
 | 
			
		||||
            video_key['part'] = m.group('part')
 | 
			
		||||
        return video_key
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        parsed_url = compat_urllib_parse_urlparse(url)
 | 
			
		||||
        info_url = compat_urlparse.urlunparse(parsed_url._replace(
 | 
			
		||||
            netloc='afbbs.afreecatv.com:8080',
 | 
			
		||||
            path='/api/video/get_video_info.php'))
 | 
			
		||||
        video_xml = self._download_xml(info_url, video_id)
 | 
			
		||||
 | 
			
		||||
        if xpath_element(video_xml, './track/video/file') is None:
 | 
			
		||||
            raise ExtractorError('Specified AfreecaTV video does not exist',
 | 
			
		||||
                                 expected=True)
 | 
			
		||||
 | 
			
		||||
        title = xpath_text(video_xml, './track/title', 'title')
 | 
			
		||||
        uploader = xpath_text(video_xml, './track/nickname', 'uploader')
 | 
			
		||||
        uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
 | 
			
		||||
        duration = int_or_none(xpath_text(video_xml, './track/duration',
 | 
			
		||||
                                          'duration'))
 | 
			
		||||
        thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for i, video_file in enumerate(video_xml.findall('./track/video/file')):
 | 
			
		||||
            video_key = self.parse_video_key(video_file.get('key', ''))
 | 
			
		||||
            if not video_key:
 | 
			
		||||
                continue
 | 
			
		||||
            entries.append({
 | 
			
		||||
                'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'upload_date': video_key.get('upload_date'),
 | 
			
		||||
                'duration': int_or_none(video_file.get('duration')),
 | 
			
		||||
                'url': video_file.text,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'uploader_id': uploader_id,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if len(entries) > 1:
 | 
			
		||||
            info['_type'] = 'multi_video'
 | 
			
		||||
            info['entries'] = entries
 | 
			
		||||
        elif len(entries) == 1:
 | 
			
		||||
            info['url'] = entries[0]['url']
 | 
			
		||||
            info['upload_date'] = entries[0].get('upload_date')
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'No files found for the specified AfreecaTV video, either'
 | 
			
		||||
                ' the URL is incorrect or the video has been made private.',
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
@@ -52,7 +52,7 @@ class AMPIE(InfoExtractor):
 | 
			
		||||
        for media_data in media_content:
 | 
			
		||||
            media = media_data['@attributes']
 | 
			
		||||
            media_type = media['type']
 | 
			
		||||
            if media_type == 'video/f4m':
 | 
			
		||||
            if media_type in ('video/f4m', 'application/f4m+xml'):
 | 
			
		||||
                formats.extend(self._extract_f4m_formats(
 | 
			
		||||
                    media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
 | 
			
		||||
                    video_id, f4m_id='hds', fatal=False))
 | 
			
		||||
@@ -61,7 +61,7 @@ class AMPIE(InfoExtractor):
 | 
			
		||||
                    media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
 | 
			
		||||
            else:
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': media_data['media-category']['@attributes']['label'],
 | 
			
		||||
                    'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
 | 
			
		||||
                    'url': media['url'],
 | 
			
		||||
                    'tbr': int_or_none(media.get('bitrate')),
 | 
			
		||||
                    'filesize': int_or_none(media.get('fileSize')),
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										224
									
								
								youtube_dl/extractor/anvato.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										224
									
								
								youtube_dl/extractor/anvato.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,224 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
import hashlib
 | 
			
		||||
import json
 | 
			
		||||
import random
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..aes import aes_encrypt
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    bytes_to_intlist,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    intlist_to_bytes,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    strip_jsonp,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def md5_text(s):
 | 
			
		||||
    if not isinstance(s, compat_str):
 | 
			
		||||
        s = compat_str(s)
 | 
			
		||||
    return hashlib.md5(s.encode('utf-8')).hexdigest()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AnvatoIE(InfoExtractor):
 | 
			
		||||
    # Copied from anvplayer.min.js
 | 
			
		||||
    _ANVACK_TABLE = {
 | 
			
		||||
        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
 | 
			
		||||
        'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
 | 
			
		||||
        'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
 | 
			
		||||
        'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
 | 
			
		||||
        'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
 | 
			
		||||
        'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
 | 
			
		||||
        'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
 | 
			
		||||
        'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
 | 
			
		||||
        'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
 | 
			
		||||
        'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
 | 
			
		||||
        'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
 | 
			
		||||
        'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
 | 
			
		||||
        'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
 | 
			
		||||
        'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
 | 
			
		||||
        'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
 | 
			
		||||
        'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
 | 
			
		||||
        'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
 | 
			
		||||
        'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
 | 
			
		||||
        'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
 | 
			
		||||
        'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
 | 
			
		||||
        'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
 | 
			
		||||
        'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
 | 
			
		||||
        'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
 | 
			
		||||
        'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
 | 
			
		||||
        'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
 | 
			
		||||
        'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
 | 
			
		||||
        'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
 | 
			
		||||
        'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
 | 
			
		||||
        'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
 | 
			
		||||
        'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
 | 
			
		||||
        'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
 | 
			
		||||
        'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
 | 
			
		||||
        'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
 | 
			
		||||
        'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
 | 
			
		||||
        'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
 | 
			
		||||
        'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
 | 
			
		||||
        'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
 | 
			
		||||
        'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
 | 
			
		||||
        'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
 | 
			
		||||
        'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
 | 
			
		||||
        'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
 | 
			
		||||
        'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
 | 
			
		||||
        'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
 | 
			
		||||
        'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
 | 
			
		||||
        'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
 | 
			
		||||
        'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
 | 
			
		||||
        'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
 | 
			
		||||
        'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
 | 
			
		||||
        'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
 | 
			
		||||
        'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
 | 
			
		||||
        'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
 | 
			
		||||
        'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
 | 
			
		||||
        'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
 | 
			
		||||
        'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
 | 
			
		||||
        'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
 | 
			
		||||
        'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
 | 
			
		||||
        'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
 | 
			
		||||
        'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
 | 
			
		||||
        'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
 | 
			
		||||
        'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
 | 
			
		||||
        'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
 | 
			
		||||
        'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
 | 
			
		||||
        'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
 | 
			
		||||
        'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
 | 
			
		||||
        'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
 | 
			
		||||
        'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
 | 
			
		||||
        'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
 | 
			
		||||
        'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
 | 
			
		||||
        'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
 | 
			
		||||
        'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
 | 
			
		||||
        'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
 | 
			
		||||
        'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
 | 
			
		||||
        'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
 | 
			
		||||
        'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
 | 
			
		||||
        'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
 | 
			
		||||
        'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
 | 
			
		||||
        'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
 | 
			
		||||
        'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
 | 
			
		||||
        'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
 | 
			
		||||
        'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
 | 
			
		||||
        'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
 | 
			
		||||
        'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
 | 
			
		||||
        'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
 | 
			
		||||
        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super(AnvatoIE, self).__init__(*args, **kwargs)
 | 
			
		||||
        self.__server_time = None
 | 
			
		||||
 | 
			
		||||
    def _server_time(self, access_key, video_id):
 | 
			
		||||
        if self.__server_time is not None:
 | 
			
		||||
            return self.__server_time
 | 
			
		||||
 | 
			
		||||
        self.__server_time = int(self._download_json(
 | 
			
		||||
            self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
 | 
			
		||||
            note='Fetching server time')['server_time'])
 | 
			
		||||
 | 
			
		||||
        return self.__server_time
 | 
			
		||||
 | 
			
		||||
    def _api_prefix(self, access_key):
 | 
			
		||||
        return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
 | 
			
		||||
 | 
			
		||||
    def _get_video_json(self, access_key, video_id):
 | 
			
		||||
        # See et() in anvplayer.min.js, which is an alias of getVideoJSON()
 | 
			
		||||
        video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
 | 
			
		||||
        server_time = self._server_time(access_key, video_id)
 | 
			
		||||
        input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
 | 
			
		||||
 | 
			
		||||
        auth_secret = intlist_to_bytes(aes_encrypt(
 | 
			
		||||
            bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
 | 
			
		||||
 | 
			
		||||
        video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
 | 
			
		||||
        anvrid = md5_text(time.time() * 1000 * random.random())[:30]
 | 
			
		||||
        payload = {
 | 
			
		||||
            'api': {
 | 
			
		||||
                'anvrid': anvrid,
 | 
			
		||||
                'anvstk': md5_text('%s|%s|%d|%s' % (
 | 
			
		||||
                    access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
 | 
			
		||||
                'anvts': server_time,
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return self._download_json(
 | 
			
		||||
            video_data_url, video_id, transform_source=strip_jsonp,
 | 
			
		||||
            data=json.dumps(payload).encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
    def _extract_anvato_videos(self, webpage, video_id):
 | 
			
		||||
        anvplayer_data = self._parse_json(self._html_search_regex(
 | 
			
		||||
            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
 | 
			
		||||
            'Anvato player data'), video_id)
 | 
			
		||||
 | 
			
		||||
        video_id = anvplayer_data['video']
 | 
			
		||||
        access_key = anvplayer_data['accessKey']
 | 
			
		||||
 | 
			
		||||
        video_data = self._get_video_json(access_key, video_id)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for published_url in video_data['published_urls']:
 | 
			
		||||
            video_url = published_url['embed_url']
 | 
			
		||||
            ext = determine_ext(video_url)
 | 
			
		||||
 | 
			
		||||
            if ext == 'smil':
 | 
			
		||||
                formats.extend(self._extract_smil_formats(video_url, video_id))
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            tbr = int_or_none(published_url.get('kbps'))
 | 
			
		||||
            a_format = {
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
 | 
			
		||||
                'tbr': tbr if tbr != 0 else None,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if ext == 'm3u8':
 | 
			
		||||
                # Not using _extract_m3u8_formats here as individual media
 | 
			
		||||
                # playlists are also included in published_urls.
 | 
			
		||||
                if tbr is None:
 | 
			
		||||
                    formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
 | 
			
		||||
                    continue
 | 
			
		||||
                else:
 | 
			
		||||
                    a_format.update({
 | 
			
		||||
                        'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
 | 
			
		||||
                        'ext': 'mp4',
 | 
			
		||||
                    })
 | 
			
		||||
            elif ext == 'mp3':
 | 
			
		||||
                a_format['vcodec'] = 'none'
 | 
			
		||||
            else:
 | 
			
		||||
                a_format.update({
 | 
			
		||||
                    'width': int_or_none(published_url.get('width')),
 | 
			
		||||
                    'height': int_or_none(published_url.get('height')),
 | 
			
		||||
                })
 | 
			
		||||
            formats.append(a_format)
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        for caption in video_data.get('captions', []):
 | 
			
		||||
            a_caption = {
 | 
			
		||||
                'url': caption['url'],
 | 
			
		||||
                'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
 | 
			
		||||
            }
 | 
			
		||||
            subtitles.setdefault(caption['language'], []).append(a_caption)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': video_data.get('def_title'),
 | 
			
		||||
            'description': video_data.get('def_description'),
 | 
			
		||||
            'categories': video_data.get('categories'),
 | 
			
		||||
            'thumbnail': video_data.get('thumbnail'),
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        }
 | 
			
		||||
@@ -12,7 +12,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class AolIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'on.aol.com'
 | 
			
		||||
    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)'
 | 
			
		||||
    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # video with 5min ID
 | 
			
		||||
@@ -53,6 +53,12 @@ class AolIE(InfoExtractor):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://on.aol.com/video/519442220',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'aol-video:5707d6b8e4b090497b04f706',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor):
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArteTVPlus7IE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'arte.tv:+7'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
class ArteTVBaseIE(InfoExtractor):
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _extract_url_info(cls, url):
 | 
			
		||||
        mobj = re.match(cls._VALID_URL, url)
 | 
			
		||||
@@ -78,6 +75,122 @@ class ArteTVPlus7IE(InfoExtractor):
 | 
			
		||||
            video_id = mobj.group('id')
 | 
			
		||||
        return video_id, lang
 | 
			
		||||
 | 
			
		||||
    def _extract_from_json_url(self, json_url, video_id, lang, title=None):
 | 
			
		||||
        info = self._download_json(json_url, video_id)
 | 
			
		||||
        player_info = info['videoJsonPlayer']
 | 
			
		||||
 | 
			
		||||
        upload_date_str = player_info.get('shootingDate')
 | 
			
		||||
        if not upload_date_str:
 | 
			
		||||
            upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
 | 
			
		||||
 | 
			
		||||
        title = (player_info.get('VTI') or title or player_info['VID']).strip()
 | 
			
		||||
        subtitle = player_info.get('VSU', '').strip()
 | 
			
		||||
        if subtitle:
 | 
			
		||||
            title += ' - %s' % subtitle
 | 
			
		||||
 | 
			
		||||
        info_dict = {
 | 
			
		||||
            'id': player_info['VID'],
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': player_info.get('VDE'),
 | 
			
		||||
            'upload_date': unified_strdate(upload_date_str),
 | 
			
		||||
            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
 | 
			
		||||
        }
 | 
			
		||||
        qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
 | 
			
		||||
 | 
			
		||||
        LANGS = {
 | 
			
		||||
            'fr': 'F',
 | 
			
		||||
            'de': 'A',
 | 
			
		||||
            'en': 'E[ANG]',
 | 
			
		||||
            'es': 'E[ESP]',
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        langcode = LANGS.get(lang, lang)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_id, format_dict in player_info['VSR'].items():
 | 
			
		||||
            f = dict(format_dict)
 | 
			
		||||
            versionCode = f.get('versionCode')
 | 
			
		||||
            l = re.escape(langcode)
 | 
			
		||||
 | 
			
		||||
            # Language preference from most to least priority
 | 
			
		||||
            # Reference: section 5.6.3 of
 | 
			
		||||
            # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
 | 
			
		||||
            PREFERENCES = (
 | 
			
		||||
                # original version in requested language, without subtitles
 | 
			
		||||
                r'VO{0}$'.format(l),
 | 
			
		||||
                # original version in requested language, with partial subtitles in requested language
 | 
			
		||||
                r'VO{0}-ST{0}$'.format(l),
 | 
			
		||||
                # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
 | 
			
		||||
                r'VO{0}-STM{0}$'.format(l),
 | 
			
		||||
                # non-original (dubbed) version in requested language, without subtitles
 | 
			
		||||
                r'V{0}$'.format(l),
 | 
			
		||||
                # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
 | 
			
		||||
                r'V{0}-ST{0}$'.format(l),
 | 
			
		||||
                # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
 | 
			
		||||
                r'V{0}-STM{0}$'.format(l),
 | 
			
		||||
                # original version in requested language, with partial subtitles in different language
 | 
			
		||||
                r'VO{0}-ST(?!{0}).+?$'.format(l),
 | 
			
		||||
                # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
 | 
			
		||||
                r'VO{0}-STM(?!{0}).+?$'.format(l),
 | 
			
		||||
                # original version in different language, with partial subtitles in requested language
 | 
			
		||||
                r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
 | 
			
		||||
                # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
 | 
			
		||||
                r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
 | 
			
		||||
                # original version in different language, without subtitles
 | 
			
		||||
                r'VO(?:(?!{0}))?$'.format(l),
 | 
			
		||||
                # original version in different language, with partial subtitles in different language
 | 
			
		||||
                r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
 | 
			
		||||
                # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
 | 
			
		||||
                r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            for pref, p in enumerate(PREFERENCES):
 | 
			
		||||
                if re.match(p, versionCode):
 | 
			
		||||
                    lang_pref = len(PREFERENCES) - pref
 | 
			
		||||
                    break
 | 
			
		||||
            else:
 | 
			
		||||
                lang_pref = -1
 | 
			
		||||
 | 
			
		||||
            format = {
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
 | 
			
		||||
                'language_preference': lang_pref,
 | 
			
		||||
                'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
 | 
			
		||||
                'width': int_or_none(f.get('width')),
 | 
			
		||||
                'height': int_or_none(f.get('height')),
 | 
			
		||||
                'tbr': int_or_none(f.get('bitrate')),
 | 
			
		||||
                'quality': qfunc(f.get('quality')),
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if f.get('mediaType') == 'rtmp':
 | 
			
		||||
                format['url'] = f['streamer']
 | 
			
		||||
                format['play_path'] = 'mp4:' + f['url']
 | 
			
		||||
                format['ext'] = 'flv'
 | 
			
		||||
            else:
 | 
			
		||||
                format['url'] = f['url']
 | 
			
		||||
 | 
			
		||||
            formats.append(format)
 | 
			
		||||
 | 
			
		||||
        self._check_formats(formats, video_id)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        info_dict['formats'] = formats
 | 
			
		||||
        return info_dict
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArteTVPlus7IE(ArteTVBaseIE):
 | 
			
		||||
    IE_NAME = 'arte.tv:+7'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id, lang = self._extract_url_info(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
@@ -132,80 +245,6 @@ class ArteTVPlus7IE(InfoExtractor):
 | 
			
		||||
            webpage, 'embed url', group='url')
 | 
			
		||||
        return self.url_result(embed_url)
 | 
			
		||||
 | 
			
		||||
    def _extract_from_json_url(self, json_url, video_id, lang, title=None):
 | 
			
		||||
        info = self._download_json(json_url, video_id)
 | 
			
		||||
        player_info = info['videoJsonPlayer']
 | 
			
		||||
 | 
			
		||||
        upload_date_str = player_info.get('shootingDate')
 | 
			
		||||
        if not upload_date_str:
 | 
			
		||||
            upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
 | 
			
		||||
 | 
			
		||||
        title = (player_info.get('VTI') or title or player_info['VID']).strip()
 | 
			
		||||
        subtitle = player_info.get('VSU', '').strip()
 | 
			
		||||
        if subtitle:
 | 
			
		||||
            title += ' - %s' % subtitle
 | 
			
		||||
 | 
			
		||||
        info_dict = {
 | 
			
		||||
            'id': player_info['VID'],
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': player_info.get('VDE'),
 | 
			
		||||
            'upload_date': unified_strdate(upload_date_str),
 | 
			
		||||
            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
 | 
			
		||||
        }
 | 
			
		||||
        qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
 | 
			
		||||
 | 
			
		||||
        LANGS = {
 | 
			
		||||
            'fr': 'F',
 | 
			
		||||
            'de': 'A',
 | 
			
		||||
            'en': 'E[ANG]',
 | 
			
		||||
            'es': 'E[ESP]',
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_id, format_dict in player_info['VSR'].items():
 | 
			
		||||
            f = dict(format_dict)
 | 
			
		||||
            versionCode = f.get('versionCode')
 | 
			
		||||
            langcode = LANGS.get(lang, lang)
 | 
			
		||||
            lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
 | 
			
		||||
            lang_pref = None
 | 
			
		||||
            if versionCode:
 | 
			
		||||
                matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
 | 
			
		||||
                lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
 | 
			
		||||
            source_pref = 0
 | 
			
		||||
            if versionCode is not None:
 | 
			
		||||
                # The original version with subtitles has lower relevance
 | 
			
		||||
                if re.match(r'VO-ST(F|A|E)', versionCode):
 | 
			
		||||
                    source_pref -= 10
 | 
			
		||||
                # The version with sourds/mal subtitles has also lower relevance
 | 
			
		||||
                elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
 | 
			
		||||
                    source_pref -= 9
 | 
			
		||||
            format = {
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
 | 
			
		||||
                'language_preference': lang_pref,
 | 
			
		||||
                'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
 | 
			
		||||
                'width': int_or_none(f.get('width')),
 | 
			
		||||
                'height': int_or_none(f.get('height')),
 | 
			
		||||
                'tbr': int_or_none(f.get('bitrate')),
 | 
			
		||||
                'quality': qfunc(f.get('quality')),
 | 
			
		||||
                'source_preference': source_pref,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if f.get('mediaType') == 'rtmp':
 | 
			
		||||
                format['url'] = f['streamer']
 | 
			
		||||
                format['play_path'] = 'mp4:' + f['url']
 | 
			
		||||
                format['ext'] = 'flv'
 | 
			
		||||
            else:
 | 
			
		||||
                format['url'] = f['url']
 | 
			
		||||
 | 
			
		||||
            formats.append(format)
 | 
			
		||||
 | 
			
		||||
        self._check_formats(formats, video_id)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        info_dict['formats'] = formats
 | 
			
		||||
        return info_dict
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# It also uses the arte_vp_url url from the webpage to extract the information
 | 
			
		||||
class ArteTVCreativeIE(ArteTVPlus7IE):
 | 
			
		||||
@@ -239,7 +278,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
 | 
			
		||||
    IE_NAME = 'arte.tv:info'
 | 
			
		||||
    _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '067528-000-A',
 | 
			
		||||
@@ -247,7 +286,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
 | 
			
		||||
            'title': 'Service civique, un cache misère ?',
 | 
			
		||||
            'upload_date': '20160403',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArteTVFutureIE(ArteTVPlus7IE):
 | 
			
		||||
@@ -272,6 +311,8 @@ class ArteTVDDCIE(ArteTVPlus7IE):
 | 
			
		||||
    IE_NAME = 'arte.tv:ddc'
 | 
			
		||||
    _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id, lang = self._extract_url_info(url)
 | 
			
		||||
        if lang == 'folge':
 | 
			
		||||
@@ -290,7 +331,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
 | 
			
		||||
    IE_NAME = 'arte.tv:concert'
 | 
			
		||||
    _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
 | 
			
		||||
        'md5': '9ea035b7bd69696b67aa2ccaaa218161',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -300,14 +341,14 @@ class ArteTVConcertIE(ArteTVPlus7IE):
 | 
			
		||||
            'upload_date': '20140128',
 | 
			
		||||
            'description': 'md5:486eb08f991552ade77439fe6d82c305',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArteTVCinemaIE(ArteTVPlus7IE):
 | 
			
		||||
    IE_NAME = 'arte.tv:cinema'
 | 
			
		||||
    _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://cinema.arte.tv/de/node/38291',
 | 
			
		||||
        'md5': '6b275511a5107c60bacbeeda368c3aa1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -317,7 +358,7 @@ class ArteTVCinemaIE(ArteTVPlus7IE):
 | 
			
		||||
            'upload_date': '20160122',
 | 
			
		||||
            'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArteTVMagazineIE(ArteTVPlus7IE):
 | 
			
		||||
@@ -362,9 +403,41 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
 | 
			
		||||
        )
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        lang = mobj.group('lang')
 | 
			
		||||
        json_url = mobj.group('json_url')
 | 
			
		||||
        return self._extract_from_json_url(json_url, video_id, lang)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArteTVPlaylistIE(ArteTVBaseIE):
 | 
			
		||||
    IE_NAME = 'arte.tv:playlist'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'PL-013263',
 | 
			
		||||
            'title': 'Areva & Uramin',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 6,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        playlist_id, lang = self._extract_url_info(url)
 | 
			
		||||
        collection = self._download_json(
 | 
			
		||||
            'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
 | 
			
		||||
            % (lang, playlist_id), playlist_id)
 | 
			
		||||
        title = collection.get('title')
 | 
			
		||||
        description = collection.get('shortDescription') or collection.get('teaserText')
 | 
			
		||||
        entries = [
 | 
			
		||||
            self._extract_from_json_url(
 | 
			
		||||
                video['jsonUrl'], video.get('programId') or playlist_id, lang)
 | 
			
		||||
            for video in collection['videos'] if video.get('jsonUrl')]
 | 
			
		||||
        return self.playlist_result(entries, playlist_id, title, description)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@ import time
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .soundcloud import SoundcloudIE
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    url_basename,
 | 
			
		||||
@@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor):
 | 
			
		||||
                        result[resultkey] = api_response[apikey]
 | 
			
		||||
                song_id = url_basename(api_response['url']).rpartition('.')[0]
 | 
			
		||||
                result['entries'].append({
 | 
			
		||||
                    'id': api_response.get('id', song_id),
 | 
			
		||||
                    'id': compat_str(api_response.get('id', song_id)),
 | 
			
		||||
                    'uploader': api_response.get('artist'),
 | 
			
		||||
                    'title': api_response.get('title', song_id),
 | 
			
		||||
                    'url': api_response['url'],
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
 | 
			
		||||
        '_skip': 'There is a limit of 200 free downloads / month for the test song'
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
 | 
			
		||||
        'md5': '2b68e5851514c20efdff2afc5603b8b4',
 | 
			
		||||
        'md5': '73d0b3171568232574e45652f8720b5c',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2650410135',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
@@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
 | 
			
		||||
            if m_trackinfo:
 | 
			
		||||
                json_code = m_trackinfo.group(1)
 | 
			
		||||
                data = json.loads(json_code)[0]
 | 
			
		||||
                track_id = compat_str(data['id'])
 | 
			
		||||
 | 
			
		||||
                if not data.get('file'):
 | 
			
		||||
                    raise ExtractorError('Not streamable', video_id=track_id, expected=True)
 | 
			
		||||
 | 
			
		||||
                formats = []
 | 
			
		||||
                for format_id, format_url in data['file'].items():
 | 
			
		||||
@@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
 | 
			
		||||
                self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
                return {
 | 
			
		||||
                    'id': compat_str(data['id']),
 | 
			
		||||
                    'id': track_id,
 | 
			
		||||
                    'title': data['title'],
 | 
			
		||||
                    'formats': formats,
 | 
			
		||||
                    'duration': float_or_none(data.get('duration')),
 | 
			
		||||
 
 | 
			
		||||
@@ -1,34 +1,42 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import calendar
 | 
			
		||||
import datetime
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
    compat_xml_parse_error,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BiliBiliIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
 | 
			
		||||
    _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.bilibili.tv/video/av1074402/',
 | 
			
		||||
        'md5': '2c301e4dab317596e837c3e7633e7d86',
 | 
			
		||||
        'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1554319',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': '【金坷垃】金泡沫',
 | 
			
		||||
            'duration': 308313,
 | 
			
		||||
            'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 | 
			
		||||
            'duration': 308.067,
 | 
			
		||||
            'timestamp': 1398012660,
 | 
			
		||||
            'upload_date': '20140420',
 | 
			
		||||
            'thumbnail': 're:^https?://.+\.jpg',
 | 
			
		||||
            'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 | 
			
		||||
            'timestamp': 1397983878,
 | 
			
		||||
            'uploader': '菊子桑',
 | 
			
		||||
            'uploader_id': '156160',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.bilibili.com/video/av1041170/',
 | 
			
		||||
@@ -36,75 +44,186 @@ class BiliBiliIE(InfoExtractor):
 | 
			
		||||
            'id': '1041170',
 | 
			
		||||
            'title': '【BD1080P】刀语【诸神&异域】',
 | 
			
		||||
            'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
 | 
			
		||||
            'uploader': '枫叶逝去',
 | 
			
		||||
            'timestamp': 1396501299,
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 9,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.bilibili.com/video/av4808130/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '4808130',
 | 
			
		||||
            'title': '【长篇】哆啦A梦443【钉铛】',
 | 
			
		||||
            'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '4808130_part1',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': '【长篇】哆啦A梦443【钉铛】',
 | 
			
		||||
                'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
 | 
			
		||||
                'timestamp': 1464564180,
 | 
			
		||||
                'upload_date': '20160529',
 | 
			
		||||
                'uploader': '喜欢拉面',
 | 
			
		||||
                'uploader_id': '151066',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'md5': '926f9f67d0c482091872fbd8eca7ea3d',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '4808130_part2',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': '【长篇】哆啦A梦443【钉铛】',
 | 
			
		||||
                'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
 | 
			
		||||
                'timestamp': 1464564180,
 | 
			
		||||
                'upload_date': '20160529',
 | 
			
		||||
                'uploader': '喜欢拉面',
 | 
			
		||||
                'uploader_id': '151066',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'md5': '4b7b225b968402d7c32348c646f1fd83',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '4808130_part3',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': '【长篇】哆啦A梦443【钉铛】',
 | 
			
		||||
                'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
 | 
			
		||||
                'timestamp': 1464564180,
 | 
			
		||||
                'upload_date': '20160529',
 | 
			
		||||
                'uploader': '喜欢拉面',
 | 
			
		||||
                'uploader_id': '151066',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'md5': '7b795e214166501e9141139eea236e91',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '4808130_part4',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': '【长篇】哆啦A梦443【钉铛】',
 | 
			
		||||
                'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
 | 
			
		||||
                'timestamp': 1464564180,
 | 
			
		||||
                'upload_date': '20160529',
 | 
			
		||||
                'uploader': '喜欢拉面',
 | 
			
		||||
                'uploader_id': '151066',
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
    }, {
 | 
			
		||||
        # Missing upload time
 | 
			
		||||
        'url': 'http://www.bilibili.com/video/av1867637/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2880301',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
 | 
			
		||||
            'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
 | 
			
		||||
            'uploader': '黑夜为猫',
 | 
			
		||||
            'uploader_id': '610729',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # Just to test metadata extraction
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        'expected_warnings': ['upload time'],
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    # BiliBili blocks keys from time to time. The current key is extracted from
 | 
			
		||||
    # the Android client
 | 
			
		||||
    # TODO: find the sign algorithm used in the flash player
 | 
			
		||||
    _APP_KEY = '86385cdc024c0f6c'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        page_num = mobj.group('page_num') or '1'
 | 
			
		||||
 | 
			
		||||
        view_data = self._download_json(
 | 
			
		||||
            'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
 | 
			
		||||
            video_id)
 | 
			
		||||
        if 'error' in view_data:
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        cid = view_data['cid']
 | 
			
		||||
        title = unescapeHTML(view_data['title'])
 | 
			
		||||
        params = compat_parse_qs(self._search_regex(
 | 
			
		||||
            [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
 | 
			
		||||
             r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
 | 
			
		||||
            webpage, 'player parameters'))
 | 
			
		||||
        cid = params['cid'][0]
 | 
			
		||||
 | 
			
		||||
        doc = self._download_xml(
 | 
			
		||||
            'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
 | 
			
		||||
            cid,
 | 
			
		||||
            'Downloading page %s/%s' % (page_num, view_data['pages'])
 | 
			
		||||
        )
 | 
			
		||||
        info_xml_str = self._download_webpage(
 | 
			
		||||
            'http://interface.bilibili.com/v_cdn_play',
 | 
			
		||||
            cid, query={'appkey': self._APP_KEY, 'cid': cid},
 | 
			
		||||
            note='Downloading video info page')
 | 
			
		||||
 | 
			
		||||
        if xpath_text(doc, './result') == 'error':
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
 | 
			
		||||
        err_msg = None
 | 
			
		||||
        durls = None
 | 
			
		||||
        info_xml = None
 | 
			
		||||
        try:
 | 
			
		||||
            info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
 | 
			
		||||
        except compat_xml_parse_error:
 | 
			
		||||
            info_json = self._parse_json(info_xml_str, video_id, fatal=False)
 | 
			
		||||
            err_msg = (info_json or {}).get('error_text')
 | 
			
		||||
        else:
 | 
			
		||||
            err_msg = xpath_text(info_xml, './message')
 | 
			
		||||
 | 
			
		||||
        if info_xml is not None:
 | 
			
		||||
            durls = info_xml.findall('./durl')
 | 
			
		||||
        if not durls:
 | 
			
		||||
            if err_msg:
 | 
			
		||||
                raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
 | 
			
		||||
            else:
 | 
			
		||||
                raise ExtractorError('No videos found!')
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
 | 
			
		||||
        for durl in doc.findall('./durl'):
 | 
			
		||||
        for durl in durls:
 | 
			
		||||
            size = xpath_text(durl, ['./filesize', './size'])
 | 
			
		||||
            formats = [{
 | 
			
		||||
                'url': durl.find('./url').text,
 | 
			
		||||
                'filesize': int_or_none(size),
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
            }]
 | 
			
		||||
            backup_urls = durl.find('./backup_url')
 | 
			
		||||
            if backup_urls is not None:
 | 
			
		||||
                for backup_url in backup_urls.findall('./url'):
 | 
			
		||||
                    formats.append({'url': backup_url.text})
 | 
			
		||||
            formats.reverse()
 | 
			
		||||
            for backup_url in durl.findall('./backup_url/url'):
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': backup_url.text,
 | 
			
		||||
                    # backup URLs have lower priorities
 | 
			
		||||
                    'preference': -2 if 'hd.mp4' in backup_url.text else -3,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
            entries.append({
 | 
			
		||||
                'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'duration': int_or_none(xpath_text(durl, './length'), 1000),
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
 | 
			
		||||
        description = self._html_search_meta('description', webpage)
 | 
			
		||||
        datetime_str = self._html_search_regex(
 | 
			
		||||
            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
 | 
			
		||||
        timestamp = None
 | 
			
		||||
        if datetime_str:
 | 
			
		||||
            timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
 | 
			
		||||
 | 
			
		||||
        # TODO 'view_count' requires deobfuscating Javascript
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': compat_str(cid),
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': view_data.get('description'),
 | 
			
		||||
            'thumbnail': view_data.get('pic'),
 | 
			
		||||
            'uploader': view_data.get('author'),
 | 
			
		||||
            'timestamp': int_or_none(view_data.get('created')),
 | 
			
		||||
            'view_count': int_or_none(view_data.get('play')),
 | 
			
		||||
            'duration': int_or_none(xpath_text(doc, './timelength')),
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
 | 
			
		||||
            'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        uploader_mobj = re.search(
 | 
			
		||||
            r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if uploader_mobj:
 | 
			
		||||
            info.update({
 | 
			
		||||
                'uploader': uploader_mobj.group('name'),
 | 
			
		||||
                'uploader_id': uploader_mobj.group('id'),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        for entry in entries:
 | 
			
		||||
            entry.update(info)
 | 
			
		||||
 | 
			
		||||
        if len(entries) == 1:
 | 
			
		||||
            entries[0].update(info)
 | 
			
		||||
            return entries[0]
 | 
			
		||||
        else:
 | 
			
		||||
            info.update({
 | 
			
		||||
            for idx, entry in enumerate(entries):
 | 
			
		||||
                entry['id'] = '%s_part%d' % (video_id, (idx + 1))
 | 
			
		||||
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'multi_video',
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'description': description,
 | 
			
		||||
                'entries': entries,
 | 
			
		||||
            })
 | 
			
		||||
            return info
 | 
			
		||||
            }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/biqle.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/biqle.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BIQLEIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.biqle.ru/watch/847655_160197695',
 | 
			
		||||
        'md5': 'ad5f746a874ccded7b8f211aeea96637',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '160197695',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
 | 
			
		||||
            'uploader': 'Andrey Rogozin',
 | 
			
		||||
            'upload_date': '20110605',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://biqle.org/watch/-44781847_168547604',
 | 
			
		||||
        'md5': '7f24e72af1db0edf7c1aaba513174f97',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '168547604',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Ребенок в шоке от автоматической мойки',
 | 
			
		||||
            'uploader': 'Dmitry Kotov',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        embed_url = self._proto_relative_url(self._search_regex(
 | 
			
		||||
            r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'url': embed_url,
 | 
			
		||||
        }
 | 
			
		||||
@@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor):
 | 
			
		||||
            'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
 | 
			
		||||
            'description': 'md5:a8ba0302912d03d246979735c17d2761',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'format': 'best[format_id^=hds]',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
 
 | 
			
		||||
@@ -307,9 +307,10 @@ class BrightcoveLegacyIE(InfoExtractor):
 | 
			
		||||
                                    playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
 | 
			
		||||
 | 
			
		||||
    def _extract_video_info(self, video_info):
 | 
			
		||||
        video_id = compat_str(video_info['id'])
 | 
			
		||||
        publisher_id = video_info.get('publisherId')
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': compat_str(video_info['id']),
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video_info['displayName'].strip(),
 | 
			
		||||
            'description': video_info.get('shortDescription'),
 | 
			
		||||
            'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
 | 
			
		||||
@@ -331,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor):
 | 
			
		||||
                    url_comp = compat_urllib_parse_urlparse(url)
 | 
			
		||||
                    if url_comp.path.endswith('.m3u8'):
 | 
			
		||||
                        formats.extend(
 | 
			
		||||
                            self._extract_m3u8_formats(url, info['id'], 'mp4'))
 | 
			
		||||
                            self._extract_m3u8_formats(
 | 
			
		||||
                                url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
                        continue
 | 
			
		||||
                    elif 'akamaihd.net' in url_comp.netloc:
 | 
			
		||||
                        # This type of renditions are served through
 | 
			
		||||
@@ -365,7 +367,7 @@ class BrightcoveLegacyIE(InfoExtractor):
 | 
			
		||||
                    a_format.update({
 | 
			
		||||
                        'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
 | 
			
		||||
                        'ext': 'mp4',
 | 
			
		||||
                        'protocol': 'm3u8',
 | 
			
		||||
                        'protocol': 'm3u8_native',
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
                formats.append(a_format)
 | 
			
		||||
@@ -395,7 +397,7 @@ class BrightcoveLegacyIE(InfoExtractor):
 | 
			
		||||
                    return ad_info
 | 
			
		||||
 | 
			
		||||
        if 'url' not in info and not info.get('formats'):
 | 
			
		||||
            raise ExtractorError('Unable to extract video url for %s' % info['id'])
 | 
			
		||||
            raise ExtractorError('Unable to extract video url for %s' % video_id)
 | 
			
		||||
        return info
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -442,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor):
 | 
			
		||||
        # non numeric ref: prefixed video id
 | 
			
		||||
        'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        # unavailable video without message but with error_code
 | 
			
		||||
        'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
@@ -512,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor):
 | 
			
		||||
            })
 | 
			
		||||
        except ExtractorError as e:
 | 
			
		||||
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
 | 
			
		||||
                json_data = self._parse_json(e.cause.read().decode(), video_id)
 | 
			
		||||
                raise ExtractorError(json_data[0]['message'], expected=True)
 | 
			
		||||
                json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
 | 
			
		||||
                raise ExtractorError(
 | 
			
		||||
                    json_data.get('message') or json_data['error_code'], expected=True)
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        title = json_data['name'].strip()
 | 
			
		||||
@@ -527,7 +534,7 @@ class BrightcoveNewIE(InfoExtractor):
 | 
			
		||||
                if not src:
 | 
			
		||||
                    continue
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    src, video_id, 'mp4', m3u8_id='hls', fatal=False))
 | 
			
		||||
                    src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
            elif source_type == 'application/dash+xml':
 | 
			
		||||
                if not src:
 | 
			
		||||
                    continue
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
 | 
			
		||||
        'md5': '05850eb8c749e2ee05ad5a1c34668493',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'studio-c-season-5-episode-5',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        }
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['Ooyala'],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_urllib_parse_urlparse
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    url_basename,
 | 
			
		||||
    qualities,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
@@ -16,24 +16,38 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class CanalplusIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
 | 
			
		||||
    _VALID_URL = r'''(?x)
 | 
			
		||||
                        https?://
 | 
			
		||||
                            (?:
 | 
			
		||||
                                (?:
 | 
			
		||||
                                    (?:(?:www|m)\.)?canalplus\.fr|
 | 
			
		||||
                                    (?:www\.)?piwiplus\.fr|
 | 
			
		||||
                                    (?:www\.)?d8\.tv|
 | 
			
		||||
                                    (?:www\.)?d17\.tv|
 | 
			
		||||
                                    (?:www\.)?itele\.fr
 | 
			
		||||
                                )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
 | 
			
		||||
                                player\.canalplus\.fr/#/(?P<id>\d+)
 | 
			
		||||
                            )
 | 
			
		||||
 | 
			
		||||
                    '''
 | 
			
		||||
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
 | 
			
		||||
    _SITE_ID_MAP = {
 | 
			
		||||
        'canalplus.fr': 'cplus',
 | 
			
		||||
        'piwiplus.fr': 'teletoon',
 | 
			
		||||
        'd8.tv': 'd8',
 | 
			
		||||
        'itele.fr': 'itele',
 | 
			
		||||
        'canalplus': 'cplus',
 | 
			
		||||
        'piwiplus': 'teletoon',
 | 
			
		||||
        'd8': 'd8',
 | 
			
		||||
        'd17': 'd17',
 | 
			
		||||
        'itele': 'itele',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
 | 
			
		||||
        'md5': '12164a6f14ff6df8bd628e8ba9b10b78',
 | 
			
		||||
        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
 | 
			
		||||
        'md5': '41f438a4904f7664b91b4ed0dec969dc',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1263092',
 | 
			
		||||
            'id': '1192814',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Le Zapping - 13/05/15',
 | 
			
		||||
            'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
 | 
			
		||||
            'upload_date': '20150513',
 | 
			
		||||
            'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
 | 
			
		||||
            'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
 | 
			
		||||
            'upload_date': '20150105',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
 | 
			
		||||
@@ -46,35 +60,45 @@ class CanalplusIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Only works from France',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
 | 
			
		||||
        'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '966289',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Campagne intime - Documentaire exceptionnel',
 | 
			
		||||
            'description': 'md5:d2643b799fb190846ae09c61e59a859f',
 | 
			
		||||
            'upload_date': '20131108',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'videos get deleted after a while',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
 | 
			
		||||
        'md5': '38b8f7934def74f0d6f3ba6c036a5f82',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1213714',
 | 
			
		||||
            'id': '1390231',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
 | 
			
		||||
            'description': 'md5:8216206ec53426ea6321321f3b3c16db',
 | 
			
		||||
            'upload_date': '20150211',
 | 
			
		||||
            'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
 | 
			
		||||
            'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
 | 
			
		||||
            'upload_date': '20160512',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1398334',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
 | 
			
		||||
            'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
 | 
			
		||||
            'upload_date': '20160607',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://m.canalplus.fr/?vid=1398231',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.groupdict().get('id')
 | 
			
		||||
        video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
 | 
			
		||||
 | 
			
		||||
        site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal']
 | 
			
		||||
        site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
 | 
			
		||||
 | 
			
		||||
        # Beware, some subclasses do not define an id group
 | 
			
		||||
        display_id = url_basename(mobj.group('path'))
 | 
			
		||||
        display_id = mobj.group('display_id') or video_id
 | 
			
		||||
 | 
			
		||||
        if video_id is None:
 | 
			
		||||
            webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 
 | 
			
		||||
@@ -4,65 +4,66 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import js_to_json
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CBCIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # with mediaId
 | 
			
		||||
        'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
 | 
			
		||||
        'md5': '97e24d09672fc4cf56256d6faa6c25bc',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2682904050',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Don Cherry – All-Stars',
 | 
			
		||||
            'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
 | 
			
		||||
            'timestamp': 1454475540,
 | 
			
		||||
            'timestamp': 1454463000,
 | 
			
		||||
            'upload_date': '20160203',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
            'uploader': 'CBCC-NEW',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # with clipId
 | 
			
		||||
        'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
 | 
			
		||||
        'md5': '0274a90b51a9b4971fe005c63f592f12',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2487345465',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Robin Williams freestyles on 90 Minutes Live',
 | 
			
		||||
            'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
 | 
			
		||||
            'upload_date': '19700101',
 | 
			
		||||
            'upload_date': '19780210',
 | 
			
		||||
            'uploader': 'CBCC-NEW',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
            'timestamp': 255977160,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # multiple iframes
 | 
			
		||||
        'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2680832926',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
 | 
			
		||||
                'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
 | 
			
		||||
                'upload_date': '19700101',
 | 
			
		||||
                'upload_date': '20160201',
 | 
			
		||||
                'timestamp': 1454342820,
 | 
			
		||||
                'uploader': 'CBCC-NEW',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'md5': '415a0e3f586113894174dfb31aa5bb1a',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2658915080',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Fly like an eagle!',
 | 
			
		||||
                'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
 | 
			
		||||
                'upload_date': '19700101',
 | 
			
		||||
                'upload_date': '20150315',
 | 
			
		||||
                'timestamp': 1426443984,
 | 
			
		||||
                'uploader': 'CBCC-NEW',
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
@@ -91,24 +92,54 @@ class CBCIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
class CBCPlayerIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.cbc.ca/player/play/2683190193',
 | 
			
		||||
        'md5': '64d25f841ddf4ddb28a235338af32e2c',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2683190193',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Gerry Runs a Sweat Shop',
 | 
			
		||||
            'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
 | 
			
		||||
            'timestamp': 1455067800,
 | 
			
		||||
            'timestamp': 1455071400,
 | 
			
		||||
            'upload_date': '20160210',
 | 
			
		||||
            'uploader': 'CBCC-NEW',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
 | 
			
		||||
        'url': 'http://www.cbc.ca/player/play/2657631896',
 | 
			
		||||
        'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2657631896',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'title': 'CBC Montreal is organizing its first ever community hackathon!',
 | 
			
		||||
            'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
 | 
			
		||||
            'timestamp': 1425704400,
 | 
			
		||||
            'upload_date': '20150307',
 | 
			
		||||
            'uploader': 'CBCC-NEW',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    }, {
 | 
			
		||||
        # available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
 | 
			
		||||
        'url': 'http://www.cbc.ca/player/play/2164402062',
 | 
			
		||||
        'md5': '17a61eb813539abea40618d6323a7f82',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2164402062',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Cancer survivor four times over',
 | 
			
		||||
            'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
 | 
			
		||||
            'timestamp': 1320410746,
 | 
			
		||||
            'upload_date': '20111104',
 | 
			
		||||
            'uploader': 'CBCC-NEW',
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        return self.url_result(
 | 
			
		||||
            'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
 | 
			
		||||
            'ThePlatformFeed', video_id)
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'ie_key': 'ThePlatform',
 | 
			
		||||
            'url': smuggle_url(
 | 
			
		||||
                'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
 | 
			
		||||
                    'force_smil_url': True
 | 
			
		||||
                }),
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,7 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .theplatform import ThePlatformIE
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    xpath_text,
 | 
			
		||||
@@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CBSIE(CBSBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
 | 
			
		||||
    _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
 | 
			
		||||
@@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE):
 | 
			
		||||
    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
        content_id = self._search_regex(
 | 
			
		||||
            [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
 | 
			
		||||
            webpage, 'content id')
 | 
			
		||||
        content_id, display_id = re.match(self._VALID_URL, url).groups()
 | 
			
		||||
        if not content_id:
 | 
			
		||||
            webpage = self._download_webpage(url, display_id)
 | 
			
		||||
            content_id = self._search_regex(
 | 
			
		||||
                [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
 | 
			
		||||
                webpage, 'content id')
 | 
			
		||||
        items_data = self._download_xml(
 | 
			
		||||
            'http://can.cbs.com/thunder/player/videoPlayerService.php',
 | 
			
		||||
            content_id, query={'partner': 'cbs', 'contentId': content_id})
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/cbslocal.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/cbslocal.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,84 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import calendar
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
from .anvato import AnvatoIE
 | 
			
		||||
from .sendtonews import SendtoNewsIE
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CBSLocalIE(AnvatoIE):
 | 
			
		||||
    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # Anvato backend
 | 
			
		||||
        'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
 | 
			
		||||
        'md5': 'f0ee3081e3843f575fccef901199b212',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '3401037',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
 | 
			
		||||
            'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
 | 
			
		||||
            'thumbnail': 're:^https?://.*',
 | 
			
		||||
            'timestamp': 1463440500,
 | 
			
		||||
            'upload_date': '20160516',
 | 
			
		||||
            'subtitles': {
 | 
			
		||||
                'en': 'mincount:5',
 | 
			
		||||
            },
 | 
			
		||||
            'categories': [
 | 
			
		||||
                'Stations\\Spoken Word\\KCBSTV',
 | 
			
		||||
                'Syndication\\MSN',
 | 
			
		||||
                'Syndication\\NDN',
 | 
			
		||||
                'Syndication\\AOL',
 | 
			
		||||
                'Syndication\\Yahoo',
 | 
			
		||||
                'Syndication\\Tribune',
 | 
			
		||||
                'Syndication\\Curb.tv',
 | 
			
		||||
                'Content\\News'
 | 
			
		||||
            ],
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # SendtoNews embed
 | 
			
		||||
        'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'GxfCe0Zo7D-175909-5588',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Recap: CLE 15, CIN 6',
 | 
			
		||||
            'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
 | 
			
		||||
            'upload_date': '20160516',
 | 
			
		||||
            'timestamp': 1463433840,
 | 
			
		||||
            'duration': 49,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        sendtonews_url = SendtoNewsIE._extract_url(webpage)
 | 
			
		||||
        if sendtonews_url:
 | 
			
		||||
            info_dict = {
 | 
			
		||||
                '_type': 'url_transparent',
 | 
			
		||||
                'url': compat_urlparse.urljoin(url, sendtonews_url),
 | 
			
		||||
            }
 | 
			
		||||
        else:
 | 
			
		||||
            info_dict = self._extract_anvato_videos(webpage, display_id)
 | 
			
		||||
 | 
			
		||||
        time_str = self._html_search_regex(
 | 
			
		||||
            r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
 | 
			
		||||
        timestamp = None
 | 
			
		||||
        if time_str:
 | 
			
		||||
            timestamp = calendar.timegm(datetime.datetime.strptime(
 | 
			
		||||
                time_str, '%b %d, %Y %I:%M %p').timetuple())
 | 
			
		||||
 | 
			
		||||
        info_dict.update({
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
        return info_dict
 | 
			
		||||
@@ -33,19 +33,33 @@ class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '61924494876844374',
 | 
			
		||||
            'id': '61924494877028507',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'První republika: Zpěvačka z Dupárny Bobina',
 | 
			
		||||
            'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
 | 
			
		||||
            'title': 'Hyde Park Civilizace: Bonus 01 - En',
 | 
			
		||||
            'description': 'English Subtittles',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg',
 | 
			
		||||
            'duration': 88.4,
 | 
			
		||||
            'duration': 81.3,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # live stream
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 402,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
 | 
			
		||||
            'is_live': True,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Georestricted to Czech Republic',
 | 
			
		||||
    }, {
 | 
			
		||||
        # video with 18+ caution trailer
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
 | 
			
		||||
@@ -118,19 +132,21 @@ class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
        req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
 | 
			
		||||
        req.add_header('Referer', url)
 | 
			
		||||
 | 
			
		||||
        playlist_title = self._og_search_title(webpage)
 | 
			
		||||
        playlist_description = self._og_search_description(webpage)
 | 
			
		||||
        playlist_title = self._og_search_title(webpage, default=None)
 | 
			
		||||
        playlist_description = self._og_search_description(webpage, default=None)
 | 
			
		||||
 | 
			
		||||
        playlist = self._download_json(req, playlist_id)['playlist']
 | 
			
		||||
        playlist_len = len(playlist)
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for item in playlist:
 | 
			
		||||
            is_live = item.get('type') == 'LIVE'
 | 
			
		||||
            formats = []
 | 
			
		||||
            for format_id, stream_url in item['streamUrls'].items():
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    stream_url, playlist_id, 'mp4',
 | 
			
		||||
                    entry_protocol='m3u8_native', fatal=False))
 | 
			
		||||
                    entry_protocol='m3u8' if is_live else 'm3u8_native',
 | 
			
		||||
                    fatal=False))
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
            item_id = item.get('id') or item['assetId']
 | 
			
		||||
@@ -145,14 +161,22 @@ class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
                if subs:
 | 
			
		||||
                    subtitles = self.extract_subtitles(episode_id, subs)
 | 
			
		||||
 | 
			
		||||
            if playlist_len == 1:
 | 
			
		||||
                final_title = playlist_title or title
 | 
			
		||||
                if is_live:
 | 
			
		||||
                    final_title = self._live_title(final_title)
 | 
			
		||||
            else:
 | 
			
		||||
                final_title = '%s (%s)' % (playlist_title, title)
 | 
			
		||||
 | 
			
		||||
            entries.append({
 | 
			
		||||
                'id': item_id,
 | 
			
		||||
                'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title),
 | 
			
		||||
                'title': final_title,
 | 
			
		||||
                'description': playlist_description if playlist_len == 1 else None,
 | 
			
		||||
                'thumbnail': thumbnail,
 | 
			
		||||
                'duration': duration,
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
                'subtitles': subtitles,
 | 
			
		||||
                'is_live': is_live,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 | 
			
		||||
 
 | 
			
		||||
@@ -20,54 +20,64 @@ class Channel9IE(InfoExtractor):
 | 
			
		||||
    '''
 | 
			
		||||
    IE_DESC = 'Channel 9'
 | 
			
		||||
    IE_NAME = 'channel9'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
 | 
			
		||||
            'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'Events/TechEd/Australia/2013/KOS002',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Developer Kick-Off Session: Stuff We Love',
 | 
			
		||||
                'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
 | 
			
		||||
                'duration': 4576,
 | 
			
		||||
                'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
                'session_code': 'KOS002',
 | 
			
		||||
                'session_day': 'Day 1',
 | 
			
		||||
                'session_room': 'Arena 1A',
 | 
			
		||||
                'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
 | 
			
		||||
            },
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
 | 
			
		||||
        'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'Events/TechEd/Australia/2013/KOS002',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Developer Kick-Off Session: Stuff We Love',
 | 
			
		||||
            'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
 | 
			
		||||
            'duration': 4576,
 | 
			
		||||
            'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
            'session_code': 'KOS002',
 | 
			
		||||
            'session_day': 'Day 1',
 | 
			
		||||
            'session_room': 'Arena 1A',
 | 
			
		||||
            'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
 | 
			
		||||
                                 'Mads Kristensen'],
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
 | 
			
		||||
            'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Self-service BI with Power BI - nuclear testing',
 | 
			
		||||
                'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
 | 
			
		||||
                'duration': 1540,
 | 
			
		||||
                'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
                'authors': ['Mike Wilmot'],
 | 
			
		||||
            },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
 | 
			
		||||
        'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Self-service BI with Power BI - nuclear testing',
 | 
			
		||||
            'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
 | 
			
		||||
            'duration': 1540,
 | 
			
		||||
            'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
            'authors': ['Mike Wilmot'],
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # low quality mp4 is best
 | 
			
		||||
            'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Ranges for the Standard Library',
 | 
			
		||||
                'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
 | 
			
		||||
                'duration': 5646,
 | 
			
		||||
                'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
    }, {
 | 
			
		||||
        # low quality mp4 is best
 | 
			
		||||
        'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Ranges for the Standard Library',
 | 
			
		||||
            'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
 | 
			
		||||
            'duration': 5646,
 | 
			
		||||
            'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
 | 
			
		||||
            'title': 'Channel 9',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 2,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
 | 
			
		||||
 | 
			
		||||
@@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(contents)
 | 
			
		||||
 | 
			
		||||
    def _extract_list(self, content_path):
 | 
			
		||||
        rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
 | 
			
		||||
    def _extract_list(self, video_id, rss_url=None):
 | 
			
		||||
        if not rss_url:
 | 
			
		||||
            rss_url = self._RSS_URL % video_id
 | 
			
		||||
        rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
 | 
			
		||||
        entries = [self.url_result(session_url.text, 'Channel9')
 | 
			
		||||
                   for session_url in rss.findall('./channel/item/link')]
 | 
			
		||||
        title_text = rss.find('./channel/title').text
 | 
			
		||||
        return self.playlist_result(entries, content_path, title_text)
 | 
			
		||||
        return self.playlist_result(entries, video_id, title_text)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        content_path = mobj.group('contentpath')
 | 
			
		||||
        rss = mobj.group('rss')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, content_path, 'Downloading web page')
 | 
			
		||||
        if rss:
 | 
			
		||||
            return self._extract_list(content_path, url)
 | 
			
		||||
 | 
			
		||||
        page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
 | 
			
		||||
        if page_type_m is not None:
 | 
			
		||||
            page_type = page_type_m.group('pagetype')
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            url, content_path, 'Downloading web page')
 | 
			
		||||
 | 
			
		||||
        page_type = self._search_regex(
 | 
			
		||||
            r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
 | 
			
		||||
            webpage, 'page type', default=None, group='pagetype')
 | 
			
		||||
        if page_type:
 | 
			
		||||
            if page_type == 'Entry':      # Any 'item'-like page, may contain downloadable content
 | 
			
		||||
                return self._extract_entry_item(webpage, content_path)
 | 
			
		||||
            elif page_type == 'Session':  # Event session page, may contain downloadable content
 | 
			
		||||
@@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor):
 | 
			
		||||
                return self._extract_list(content_path)
 | 
			
		||||
            else:
 | 
			
		||||
                raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
 | 
			
		||||
 | 
			
		||||
        else:  # Assuming list
 | 
			
		||||
            return self._extract_list(content_path)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,119 +0,0 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import ExtractorError
 | 
			
		||||
from .screenwavemedia import ScreenwaveMediaIE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CinemassacreIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
 | 
			
		||||
            'md5': 'fde81fbafaee331785f58cd6c0d46190',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'Cinemassacre-19911',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'upload_date': '20121110',
 | 
			
		||||
                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
 | 
			
		||||
                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
 | 
			
		||||
            'md5': 'd72f10cd39eac4215048f62ab477a511',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'Cinemassacre-521be8ef82b16',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'upload_date': '20131002',
 | 
			
		||||
                'title': 'The Mummy’s Hand (1940)',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # Youtube embedded video
 | 
			
		||||
            'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
 | 
			
		||||
            'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'OEVzPCY2T-g',
 | 
			
		||||
                'ext': 'webm',
 | 
			
		||||
                'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
 | 
			
		||||
                'upload_date': '20061207',
 | 
			
		||||
                'uploader': 'Cinemassacre',
 | 
			
		||||
                'uploader_id': 'JamesNintendoNerd',
 | 
			
		||||
                'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # Youtube embedded video
 | 
			
		||||
            'url': 'http://cinemassacre.com/2006/09/01/mckids/',
 | 
			
		||||
            'md5': '7393c4e0f54602ad110c793eb7a6513a',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'FnxsNhuikpo',
 | 
			
		||||
                'ext': 'webm',
 | 
			
		||||
                'upload_date': '20060901',
 | 
			
		||||
                'uploader': 'Cinemassacre Extra',
 | 
			
		||||
                'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
 | 
			
		||||
                'uploader_id': 'Cinemassacre',
 | 
			
		||||
                'title': 'AVGN: McKids',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
 | 
			
		||||
            'md5': '1376908e49572389e7b06251a53cdd08',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'Cinemassacre-555779690c440',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
 | 
			
		||||
                'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
 | 
			
		||||
                'upload_date': '20150525',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        playerdata_url = self._search_regex(
 | 
			
		||||
            [
 | 
			
		||||
                ScreenwaveMediaIE.EMBED_PATTERN,
 | 
			
		||||
                r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
 | 
			
		||||
            ],
 | 
			
		||||
            webpage, 'player data URL', default=None, group='url')
 | 
			
		||||
        if not playerdata_url:
 | 
			
		||||
            raise ExtractorError('Unable to find player data')
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
            r'<title>(?P<title>.+?)\|', webpage, 'title')
 | 
			
		||||
        video_description = self._html_search_regex(
 | 
			
		||||
            r'<div class="entry-content">(?P<description>.+?)</div>',
 | 
			
		||||
            webpage, 'description', flags=re.DOTALL, fatal=False)
 | 
			
		||||
        video_thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'description': video_description,
 | 
			
		||||
            'upload_date': video_date,
 | 
			
		||||
            'thumbnail': video_thumbnail,
 | 
			
		||||
            'url': playerdata_url,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,101 +0,0 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import int_or_none
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CollegeHumorIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
 | 
			
		||||
            'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '6902724',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Comic-Con Cosplay Catastrophe',
 | 
			
		||||
                'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.",
 | 
			
		||||
                'age_limit': 13,
 | 
			
		||||
                'duration': 187,
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'url': 'http://www.collegehumor.com/video/3505939/font-conference',
 | 
			
		||||
            'md5': '72fa701d8ef38664a4dbb9e2ab721816',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '3505939',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Font Conference',
 | 
			
		||||
                'description': "This video wasn't long enough, so we made it double-spaced.",
 | 
			
		||||
                'age_limit': 10,
 | 
			
		||||
                'duration': 179,
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            # embedded youtube video
 | 
			
		||||
            'url': 'http://www.collegehumor.com/embed/6950306',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'Z-bao9fg6Yc',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
 | 
			
		||||
                'uploader': 'Mark Dice',
 | 
			
		||||
                'uploader_id': 'MarkDice',
 | 
			
		||||
                'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
 | 
			
		||||
                'upload_date': '20140127',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
            'add_ie': ['Youtube'],
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('videoid')
 | 
			
		||||
 | 
			
		||||
        jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
 | 
			
		||||
        data = json.loads(self._download_webpage(
 | 
			
		||||
            jsonUrl, video_id, 'Downloading info JSON'))
 | 
			
		||||
        vdata = data['video']
 | 
			
		||||
        if vdata.get('youtubeId') is not None:
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'url',
 | 
			
		||||
                'url': vdata['youtubeId'],
 | 
			
		||||
                'ie_key': 'Youtube',
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
 | 
			
		||||
        rating = vdata.get('rating')
 | 
			
		||||
        if rating:
 | 
			
		||||
            age_limit = AGE_LIMITS.get(rating.lower())
 | 
			
		||||
        else:
 | 
			
		||||
            age_limit = None  # None = No idea
 | 
			
		||||
 | 
			
		||||
        PREFS = {'high_quality': 2, 'low_quality': 0}
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_key in ('mp4', 'webm'):
 | 
			
		||||
            for qname, qurl in vdata.get(format_key, {}).items():
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': format_key + '_' + qname,
 | 
			
		||||
                    'url': qurl,
 | 
			
		||||
                    'format': format_key,
 | 
			
		||||
                    'preference': PREFS.get(qname),
 | 
			
		||||
                })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        duration = int_or_none(vdata.get('duration'), 1000)
 | 
			
		||||
        like_count = int_or_none(vdata.get('likes'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': vdata['title'],
 | 
			
		||||
            'description': vdata.get('description'),
 | 
			
		||||
            'thumbnail': vdata.get('thumbnail'),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'like_count': like_count,
 | 
			
		||||
        }
 | 
			
		||||
@@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
 | 
			
		||||
    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
 | 
			
		||||
    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
 | 
			
		||||
                      |https?://(:www\.)?
 | 
			
		||||
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
 | 
			
		||||
                          (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
 | 
			
		||||
                         ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
 | 
			
		||||
                          (?P<clip>
 | 
			
		||||
                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
 | 
			
		||||
                              (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
 | 
			
		||||
                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
 | 
			
		||||
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
 | 
			
		||||
                          )|
 | 
			
		||||
@@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
 | 
			
		||||
 
 | 
			
		||||
@@ -45,6 +45,7 @@ from ..utils import (
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    url_basename,
 | 
			
		||||
    xpath_element,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
    determine_protocol,
 | 
			
		||||
@@ -163,7 +164,7 @@ class InfoExtractor(object):
 | 
			
		||||
    description:    Full video description.
 | 
			
		||||
    uploader:       Full name of the video uploader.
 | 
			
		||||
    license:        License name the video is licensed under.
 | 
			
		||||
    creator:        The main artist who created the video.
 | 
			
		||||
    creator:        The creator of the video.
 | 
			
		||||
    release_date:   The date (YYYYMMDD) when the video was released.
 | 
			
		||||
    timestamp:      UNIX timestamp of the moment the video became available.
 | 
			
		||||
    upload_date:    Video upload date (YYYYMMDD).
 | 
			
		||||
@@ -987,7 +988,7 @@ class InfoExtractor(object):
 | 
			
		||||
 | 
			
		||||
    def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
 | 
			
		||||
                             transform_source=lambda s: fix_xml_ampersands(s).strip(),
 | 
			
		||||
                             fatal=True):
 | 
			
		||||
                             fatal=True, m3u8_id=None):
 | 
			
		||||
        manifest = self._download_xml(
 | 
			
		||||
            manifest_url, video_id, 'Downloading f4m manifest',
 | 
			
		||||
            'Unable to download f4m manifest',
 | 
			
		||||
@@ -1001,11 +1002,11 @@ class InfoExtractor(object):
 | 
			
		||||
 | 
			
		||||
        return self._parse_f4m_formats(
 | 
			
		||||
            manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
 | 
			
		||||
            transform_source=transform_source, fatal=fatal)
 | 
			
		||||
            transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
 | 
			
		||||
 | 
			
		||||
    def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
 | 
			
		||||
                           transform_source=lambda s: fix_xml_ampersands(s).strip(),
 | 
			
		||||
                           fatal=True):
 | 
			
		||||
                           fatal=True, m3u8_id=None):
 | 
			
		||||
        # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
 | 
			
		||||
        akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
 | 
			
		||||
        if akamai_pv is not None and ';' in akamai_pv.text:
 | 
			
		||||
@@ -1029,9 +1030,26 @@ class InfoExtractor(object):
 | 
			
		||||
            'base URL', default=None)
 | 
			
		||||
        if base_url:
 | 
			
		||||
            base_url = base_url.strip()
 | 
			
		||||
 | 
			
		||||
        bootstrap_info = xpath_element(
 | 
			
		||||
            manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
 | 
			
		||||
            'bootstrap info', default=None)
 | 
			
		||||
 | 
			
		||||
        for i, media_el in enumerate(media_nodes):
 | 
			
		||||
            if manifest_version == '2.0':
 | 
			
		||||
                media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
 | 
			
		||||
            tbr = int_or_none(media_el.attrib.get('bitrate'))
 | 
			
		||||
            width = int_or_none(media_el.attrib.get('width'))
 | 
			
		||||
            height = int_or_none(media_el.attrib.get('height'))
 | 
			
		||||
            format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
 | 
			
		||||
            # If <bootstrapInfo> is present, the specified f4m is a
 | 
			
		||||
            # stream-level manifest, and only set-level manifests may refer to
 | 
			
		||||
            # external resources.  See section 11.4 and section 4 of F4M spec
 | 
			
		||||
            if bootstrap_info is None:
 | 
			
		||||
                media_url = None
 | 
			
		||||
                # @href is introduced in 2.0, see section 11.6 of F4M spec
 | 
			
		||||
                if manifest_version == '2.0':
 | 
			
		||||
                    media_url = media_el.attrib.get('href')
 | 
			
		||||
                if media_url is None:
 | 
			
		||||
                    media_url = media_el.attrib.get('url')
 | 
			
		||||
                if not media_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                manifest_url = (
 | 
			
		||||
@@ -1041,29 +1059,43 @@ class InfoExtractor(object):
 | 
			
		||||
                # since bitrates in parent manifest (this one) and media_url manifest
 | 
			
		||||
                # may differ leading to inability to resolve the format by requested
 | 
			
		||||
                # bitrate in f4m downloader
 | 
			
		||||
                if determine_ext(manifest_url) == 'f4m':
 | 
			
		||||
                    formats.extend(self._extract_f4m_formats(
 | 
			
		||||
                ext = determine_ext(manifest_url)
 | 
			
		||||
                if ext == 'f4m':
 | 
			
		||||
                    f4m_formats = self._extract_f4m_formats(
 | 
			
		||||
                        manifest_url, video_id, preference=preference, f4m_id=f4m_id,
 | 
			
		||||
                        transform_source=transform_source, fatal=fatal))
 | 
			
		||||
                        transform_source=transform_source, fatal=fatal)
 | 
			
		||||
                    # Sometimes stream-level manifest contains single media entry that
 | 
			
		||||
                    # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
 | 
			
		||||
                    # At the same time parent's media entry in set-level manifest may
 | 
			
		||||
                    # contain it. We will copy it from parent in such cases.
 | 
			
		||||
                    if len(f4m_formats) == 1:
 | 
			
		||||
                        f = f4m_formats[0]
 | 
			
		||||
                        f.update({
 | 
			
		||||
                            'tbr': f.get('tbr') or tbr,
 | 
			
		||||
                            'width': f.get('width') or width,
 | 
			
		||||
                            'height': f.get('height') or height,
 | 
			
		||||
                            'format_id': f.get('format_id') if not tbr else format_id,
 | 
			
		||||
                        })
 | 
			
		||||
                    formats.extend(f4m_formats)
 | 
			
		||||
                    continue
 | 
			
		||||
                elif ext == 'm3u8':
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        manifest_url, video_id, 'mp4', preference=preference,
 | 
			
		||||
                        m3u8_id=m3u8_id, fatal=fatal))
 | 
			
		||||
                    continue
 | 
			
		||||
            tbr = int_or_none(media_el.attrib.get('bitrate'))
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'url': manifest_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'ext': 'flv' if bootstrap_info is not None else None,
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
                'width': int_or_none(media_el.attrib.get('width')),
 | 
			
		||||
                'height': int_or_none(media_el.attrib.get('height')),
 | 
			
		||||
                'width': width,
 | 
			
		||||
                'height': height,
 | 
			
		||||
                'preference': preference,
 | 
			
		||||
            })
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
 | 
			
		||||
                              entry_protocol='m3u8', preference=None,
 | 
			
		||||
                              m3u8_id=None, note=None, errnote=None,
 | 
			
		||||
                              fatal=True, live=False):
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
    def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
 | 
			
		||||
        return {
 | 
			
		||||
            'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
 | 
			
		||||
            'url': m3u8_url,
 | 
			
		||||
            'ext': ext,
 | 
			
		||||
@@ -1071,7 +1103,14 @@ class InfoExtractor(object):
 | 
			
		||||
            'preference': preference - 1 if preference else -1,
 | 
			
		||||
            'resolution': 'multiple',
 | 
			
		||||
            'format_note': 'Quality selection URL',
 | 
			
		||||
        }]
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
 | 
			
		||||
                              entry_protocol='m3u8', preference=None,
 | 
			
		||||
                              m3u8_id=None, note=None, errnote=None,
 | 
			
		||||
                              fatal=True, live=False):
 | 
			
		||||
 | 
			
		||||
        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
 | 
			
		||||
 | 
			
		||||
        format_url = lambda u: (
 | 
			
		||||
            u
 | 
			
		||||
@@ -1138,12 +1177,15 @@ class InfoExtractor(object):
 | 
			
		||||
                format_id = []
 | 
			
		||||
                if m3u8_id:
 | 
			
		||||
                    format_id.append(m3u8_id)
 | 
			
		||||
                last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
 | 
			
		||||
                last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
 | 
			
		||||
                # Despite specification does not mention NAME attribute for
 | 
			
		||||
                # EXT-X-STREAM-INF it still sometimes may be present
 | 
			
		||||
                stream_name = last_info.get('NAME') or last_media_name
 | 
			
		||||
                # Bandwidth of live streams may differ over time thus making
 | 
			
		||||
                # format_id unpredictable. So it's better to keep provided
 | 
			
		||||
                # format_id intact.
 | 
			
		||||
                if not live:
 | 
			
		||||
                    format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
 | 
			
		||||
                    format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
 | 
			
		||||
                f = {
 | 
			
		||||
                    'format_id': '-'.join(format_id),
 | 
			
		||||
                    'url': format_url(line.strip()),
 | 
			
		||||
@@ -1275,21 +1317,21 @@ class InfoExtractor(object):
 | 
			
		||||
        m3u8_count = 0
 | 
			
		||||
 | 
			
		||||
        srcs = []
 | 
			
		||||
        videos = smil.findall(self._xpath_ns('.//video', namespace))
 | 
			
		||||
        for video in videos:
 | 
			
		||||
            src = video.get('src')
 | 
			
		||||
        media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
 | 
			
		||||
        for medium in media:
 | 
			
		||||
            src = medium.get('src')
 | 
			
		||||
            if not src or src in srcs:
 | 
			
		||||
                continue
 | 
			
		||||
            srcs.append(src)
 | 
			
		||||
 | 
			
		||||
            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
 | 
			
		||||
            filesize = int_or_none(video.get('size') or video.get('fileSize'))
 | 
			
		||||
            width = int_or_none(video.get('width'))
 | 
			
		||||
            height = int_or_none(video.get('height'))
 | 
			
		||||
            proto = video.get('proto')
 | 
			
		||||
            ext = video.get('ext')
 | 
			
		||||
            bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
 | 
			
		||||
            filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
 | 
			
		||||
            width = int_or_none(medium.get('width'))
 | 
			
		||||
            height = int_or_none(medium.get('height'))
 | 
			
		||||
            proto = medium.get('proto')
 | 
			
		||||
            ext = medium.get('ext')
 | 
			
		||||
            src_ext = determine_ext(src)
 | 
			
		||||
            streamer = video.get('streamer') or base
 | 
			
		||||
            streamer = medium.get('streamer') or base
 | 
			
		||||
 | 
			
		||||
            if proto == 'rtmp' or streamer.startswith('rtmp'):
 | 
			
		||||
                rtmp_count += 1
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										143
									
								
								youtube_dl/extractor/coub.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								youtube_dl/extractor/coub.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,143 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    qualities,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CoubIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://coub.com/view/5u5n1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '5u5n1',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'The Matrix Moonwalk',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            'duration': 4.6,
 | 
			
		||||
            'timestamp': 1428527772,
 | 
			
		||||
            'upload_date': '20150408',
 | 
			
		||||
            'uploader': 'Артём Лоскутников',
 | 
			
		||||
            'uploader_id': 'artyom.loskutnikov',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'like_count': int,
 | 
			
		||||
            'repost_count': int,
 | 
			
		||||
            'comment_count': int,
 | 
			
		||||
            'age_limit': 0,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'coub:5u5n1',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        # longer video id
 | 
			
		||||
        'url': 'http://coub.com/view/237d5l5h',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        coub = self._download_json(
 | 
			
		||||
            'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        if coub.get('error'):
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
 | 
			
		||||
 | 
			
		||||
        title = coub['title']
 | 
			
		||||
 | 
			
		||||
        file_versions = coub['file_versions']
 | 
			
		||||
 | 
			
		||||
        QUALITIES = ('low', 'med', 'high')
 | 
			
		||||
 | 
			
		||||
        MOBILE = 'mobile'
 | 
			
		||||
        IPHONE = 'iphone'
 | 
			
		||||
        HTML5 = 'html5'
 | 
			
		||||
 | 
			
		||||
        SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
 | 
			
		||||
 | 
			
		||||
        quality_key = qualities(QUALITIES)
 | 
			
		||||
        preference_key = qualities(SOURCE_PREFERENCE)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        for kind, items in file_versions.get(HTML5, {}).items():
 | 
			
		||||
            if kind not in ('video', 'audio'):
 | 
			
		||||
                continue
 | 
			
		||||
            if not isinstance(items, dict):
 | 
			
		||||
                continue
 | 
			
		||||
            for quality, item in items.items():
 | 
			
		||||
                if not isinstance(item, dict):
 | 
			
		||||
                    continue
 | 
			
		||||
                item_url = item.get('url')
 | 
			
		||||
                if not item_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': item_url,
 | 
			
		||||
                    'format_id': '%s-%s-%s' % (HTML5, kind, quality),
 | 
			
		||||
                    'filesize': int_or_none(item.get('size')),
 | 
			
		||||
                    'vcodec': 'none' if kind == 'audio' else None,
 | 
			
		||||
                    'quality': quality_key(quality),
 | 
			
		||||
                    'preference': preference_key(HTML5),
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        iphone_url = file_versions.get(IPHONE, {}).get('url')
 | 
			
		||||
        if iphone_url:
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': iphone_url,
 | 
			
		||||
                'format_id': IPHONE,
 | 
			
		||||
                'preference': preference_key(IPHONE),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
 | 
			
		||||
        if mobile_url:
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': mobile_url,
 | 
			
		||||
                'format_id': '%s-audio' % MOBILE,
 | 
			
		||||
                'preference': preference_key(MOBILE),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        thumbnail = coub.get('picture')
 | 
			
		||||
        duration = float_or_none(coub.get('duration'))
 | 
			
		||||
        timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
 | 
			
		||||
        uploader = coub.get('channel', {}).get('title')
 | 
			
		||||
        uploader_id = coub.get('channel', {}).get('permalink')
 | 
			
		||||
 | 
			
		||||
        view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
 | 
			
		||||
        like_count = int_or_none(coub.get('likes_count'))
 | 
			
		||||
        repost_count = int_or_none(coub.get('recoubs_count'))
 | 
			
		||||
        comment_count = int_or_none(coub.get('comments_count'))
 | 
			
		||||
 | 
			
		||||
        age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
 | 
			
		||||
        if age_restricted is not None:
 | 
			
		||||
            age_limit = 18 if age_restricted is True else 0
 | 
			
		||||
        else:
 | 
			
		||||
            age_limit = None
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'uploader_id': uploader_id,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'like_count': like_count,
 | 
			
		||||
            'repost_count': repost_count,
 | 
			
		||||
            'comment_count': comment_count,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										61
									
								
								youtube_dl/extractor/dailymail.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								youtube_dl/extractor/dailymail.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,61 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    determine_protocol,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DailyMailIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
 | 
			
		||||
        'md5': '2f639d446394f53f3a33658b518b6615',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1288527',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Turn any video into an impressionist masterpiece',
 | 
			
		||||
            'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        video_data = self._parse_json(self._search_regex(
 | 
			
		||||
            r"data-opts='({.+?})'", webpage, 'video data'), video_id)
 | 
			
		||||
        title = video_data['title']
 | 
			
		||||
        video_sources = self._download_json(video_data.get(
 | 
			
		||||
            'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for rendition in video_sources['renditions']:
 | 
			
		||||
            rendition_url = rendition.get('url')
 | 
			
		||||
            if not rendition_url:
 | 
			
		||||
                continue
 | 
			
		||||
            tbr = int_or_none(rendition.get('encodingRate'), 1000)
 | 
			
		||||
            container = rendition.get('videoContainer')
 | 
			
		||||
            is_hls = container == 'M2TS'
 | 
			
		||||
            protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''),
 | 
			
		||||
                'url': rendition_url,
 | 
			
		||||
                'width': int_or_none(rendition.get('frameWidth')),
 | 
			
		||||
                'height': int_or_none(rendition.get('frameHeight')),
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
                'vcodec': rendition.get('videoCodec'),
 | 
			
		||||
                'container': container,
 | 
			
		||||
                'protocol': protocol,
 | 
			
		||||
                'ext': 'mp4' if is_hls else None,
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': video_data.get('descr'),
 | 
			
		||||
            'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -71,7 +71,7 @@ class DiscoveryIE(InfoExtractor):
 | 
			
		||||
        entries = []
 | 
			
		||||
 | 
			
		||||
        for idx, video_info in enumerate(info['playlist']):
 | 
			
		||||
            subtitles = []
 | 
			
		||||
            subtitles = {}
 | 
			
		||||
            caption_url = video_info.get('captionsUrl')
 | 
			
		||||
            if caption_url:
 | 
			
		||||
                subtitles = {
 | 
			
		||||
 
 | 
			
		||||
@@ -2,13 +2,16 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import int_or_none
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DWIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'dw'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # video
 | 
			
		||||
        'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
 | 
			
		||||
@@ -31,6 +34,18 @@ class DWIE(InfoExtractor):
 | 
			
		||||
            'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
 | 
			
		||||
            'upload_date': '20160311',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # DW documentaries, only last for one or two weeks
 | 
			
		||||
        'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
 | 
			
		||||
        'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '19274438',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Welcome to the 90s – Hip Hop',
 | 
			
		||||
            'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
 | 
			
		||||
            'upload_date': '20160521',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Video removed',
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -38,6 +53,7 @@ class DWIE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(url, media_id)
 | 
			
		||||
        hidden_inputs = self._hidden_inputs(webpage)
 | 
			
		||||
        title = hidden_inputs['media_title']
 | 
			
		||||
        media_id = hidden_inputs.get('media_id') or media_id
 | 
			
		||||
 | 
			
		||||
        if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
 | 
			
		||||
            formats = self._extract_smil_formats(
 | 
			
		||||
@@ -49,13 +65,20 @@ class DWIE(InfoExtractor):
 | 
			
		||||
        else:
 | 
			
		||||
            formats = [{'url': hidden_inputs['file_name']}]
 | 
			
		||||
 | 
			
		||||
        upload_date = hidden_inputs.get('display_date')
 | 
			
		||||
        if not upload_date:
 | 
			
		||||
            upload_date = self._html_search_regex(
 | 
			
		||||
                r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
 | 
			
		||||
                'upload date', default=None)
 | 
			
		||||
            upload_date = unified_strdate(upload_date)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': media_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'thumbnail': hidden_inputs.get('preview_image'),
 | 
			
		||||
            'duration': int_or_none(hidden_inputs.get('file_duration')),
 | 
			
		||||
            'upload_date': hidden_inputs.get('display_date'),
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -11,8 +11,8 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EpornerIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
 | 
			
		||||
        'md5': '39d486f046212d8e1b911c52ab4691f8',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -23,8 +23,12 @@ class EpornerIE(InfoExtractor):
 | 
			
		||||
            'duration': 1838,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # New (May 2016) URL layout
 | 
			
		||||
        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://espn.go.com/video/clip?id=10365079',
 | 
			
		||||
        'md5': '60e5d097a523e767d06479335d1bdc58',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
 | 
			
		||||
            'description': None,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['OoyalaExternal'],
 | 
			
		||||
    }, {
 | 
			
		||||
        # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
 | 
			
		||||
        'url': 'http://espn.go.com/video/clip?id=2743663',
 | 
			
		||||
        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Must-See Moments: Best of the MLS season',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['OoyalaExternal'],
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
 
 | 
			
		||||
@@ -3,6 +3,10 @@ from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .abc import ABCIE
 | 
			
		||||
from .abc7news import Abc7NewsIE
 | 
			
		||||
from .abcnews import (
 | 
			
		||||
    AbcNewsIE,
 | 
			
		||||
    AbcNewsVideoIE,
 | 
			
		||||
)
 | 
			
		||||
from .academicearth import AcademicEarthCourseIE
 | 
			
		||||
from .acast import (
 | 
			
		||||
    ACastIE,
 | 
			
		||||
@@ -17,6 +21,7 @@ from .adobetv import (
 | 
			
		||||
)
 | 
			
		||||
from .adultswim import AdultSwimIE
 | 
			
		||||
from .aenetworks import AENetworksIE
 | 
			
		||||
from .afreecatv import AfreecaTVIE
 | 
			
		||||
from .aftonbladet import AftonbladetIE
 | 
			
		||||
from .airmozilla import AirMozillaIE
 | 
			
		||||
from .aljazeera import AlJazeeraIE
 | 
			
		||||
@@ -52,6 +57,7 @@ from .arte import (
 | 
			
		||||
    ArteTVDDCIE,
 | 
			
		||||
    ArteTVMagazineIE,
 | 
			
		||||
    ArteTVEmbedIE,
 | 
			
		||||
    ArteTVPlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
from .atresplayer import AtresPlayerIE
 | 
			
		||||
from .atttechchannel import ATTTechChannelIE
 | 
			
		||||
@@ -75,6 +81,7 @@ from .bigflix import BigflixIE
 | 
			
		||||
from .bild import BildIE
 | 
			
		||||
from .bilibili import BiliBiliIE
 | 
			
		||||
from .biobiochiletv import BioBioChileTVIE
 | 
			
		||||
from .biqle import BIQLEIE
 | 
			
		||||
from .bleacherreport import (
 | 
			
		||||
    BleacherReportIE,
 | 
			
		||||
    BleacherReportCMSIE,
 | 
			
		||||
@@ -106,6 +113,7 @@ from .cbc import (
 | 
			
		||||
    CBCPlayerIE,
 | 
			
		||||
)
 | 
			
		||||
from .cbs import CBSIE
 | 
			
		||||
from .cbslocal import CBSLocalIE
 | 
			
		||||
from .cbsinteractive import CBSInteractiveIE
 | 
			
		||||
from .cbsnews import (
 | 
			
		||||
    CBSNewsIE,
 | 
			
		||||
@@ -123,7 +131,6 @@ from .chirbit import (
 | 
			
		||||
    ChirbitProfileIE,
 | 
			
		||||
)
 | 
			
		||||
from .cinchcast import CinchcastIE
 | 
			
		||||
from .cinemassacre import CinemassacreIE
 | 
			
		||||
from .cliprs import ClipRsIE
 | 
			
		||||
from .clipfish import ClipfishIE
 | 
			
		||||
from .cliphunter import CliphunterIE
 | 
			
		||||
@@ -138,7 +145,7 @@ from .cnn import (
 | 
			
		||||
    CNNBlogsIE,
 | 
			
		||||
    CNNArticleIE,
 | 
			
		||||
)
 | 
			
		||||
from .collegehumor import CollegeHumorIE
 | 
			
		||||
from .coub import CoubIE
 | 
			
		||||
from .collegerama import CollegeRamaIE
 | 
			
		||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 | 
			
		||||
from .comcarcoff import ComCarCoffIE
 | 
			
		||||
@@ -157,6 +164,7 @@ from .cspan import CSpanIE
 | 
			
		||||
from .ctsnews import CtsNewsIE
 | 
			
		||||
from .cultureunplugged import CultureUnpluggedIE
 | 
			
		||||
from .cwtv import CWTVIE
 | 
			
		||||
from .dailymail import DailyMailIE
 | 
			
		||||
from .dailymotion import (
 | 
			
		||||
    DailymotionIE,
 | 
			
		||||
    DailymotionPlaylistIE,
 | 
			
		||||
@@ -226,6 +234,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
 | 
			
		||||
from .exfm import ExfmIE
 | 
			
		||||
from .expotv import ExpoTVIE
 | 
			
		||||
from .extremetube import ExtremeTubeIE
 | 
			
		||||
from .eyedotv import EyedoTVIE
 | 
			
		||||
from .facebook import FacebookIE
 | 
			
		||||
from .faz import FazIE
 | 
			
		||||
from .fc2 import FC2IE
 | 
			
		||||
@@ -238,6 +247,7 @@ from .fktv import FKTVIE
 | 
			
		||||
from .flickr import FlickrIE
 | 
			
		||||
from .folketinget import FolketingetIE
 | 
			
		||||
from .footyroom import FootyRoomIE
 | 
			
		||||
from .formula1 import Formula1IE
 | 
			
		||||
from .fourtube import FourTubeIE
 | 
			
		||||
from .fox import FOXIE
 | 
			
		||||
from .foxgay import FoxgayIE
 | 
			
		||||
@@ -282,6 +292,7 @@ from .globo import (
 | 
			
		||||
    GloboArticleIE,
 | 
			
		||||
)
 | 
			
		||||
from .godtube import GodTubeIE
 | 
			
		||||
from .godtv import GodTVIE
 | 
			
		||||
from .goldenmoustache import GoldenMoustacheIE
 | 
			
		||||
from .golem import GolemIE
 | 
			
		||||
from .googledrive import GoogleDriveIE
 | 
			
		||||
@@ -365,6 +376,7 @@ from .kuwo import (
 | 
			
		||||
)
 | 
			
		||||
from .la7 import LA7IE
 | 
			
		||||
from .laola1tv import Laola1TvIE
 | 
			
		||||
from .learnr import LearnrIE
 | 
			
		||||
from .lecture2go import Lecture2GoIE
 | 
			
		||||
from .lemonde import LemondeIE
 | 
			
		||||
from .leeco import (
 | 
			
		||||
@@ -372,6 +384,7 @@ from .leeco import (
 | 
			
		||||
    LePlaylistIE,
 | 
			
		||||
    LetvCloudIE,
 | 
			
		||||
)
 | 
			
		||||
from .libraryofcongress import LibraryOfCongressIE
 | 
			
		||||
from .libsyn import LibsynIE
 | 
			
		||||
from .lifenews import (
 | 
			
		||||
    LifeNewsIE,
 | 
			
		||||
@@ -382,6 +395,7 @@ from .limelight import (
 | 
			
		||||
    LimelightChannelIE,
 | 
			
		||||
    LimelightChannelListIE,
 | 
			
		||||
)
 | 
			
		||||
from .litv import LiTVIE
 | 
			
		||||
from .liveleak import LiveLeakIE
 | 
			
		||||
from .livestream import (
 | 
			
		||||
    LivestreamIE,
 | 
			
		||||
@@ -389,6 +403,7 @@ from .livestream import (
 | 
			
		||||
    LivestreamShortenerIE,
 | 
			
		||||
)
 | 
			
		||||
from .lnkgo import LnkGoIE
 | 
			
		||||
from .localnews8 import LocalNews8IE
 | 
			
		||||
from .lovehomeporn import LoveHomePornIE
 | 
			
		||||
from .lrt import LRTIE
 | 
			
		||||
from .lynda import (
 | 
			
		||||
@@ -406,6 +421,10 @@ from .metacafe import MetacafeIE
 | 
			
		||||
from .metacritic import MetacriticIE
 | 
			
		||||
from .mgoon import MgoonIE
 | 
			
		||||
from .mgtv import MGTVIE
 | 
			
		||||
from .microsoftvirtualacademy import (
 | 
			
		||||
    MicrosoftVirtualAcademyIE,
 | 
			
		||||
    MicrosoftVirtualAcademyCourseIE,
 | 
			
		||||
)
 | 
			
		||||
from .minhateca import MinhatecaIE
 | 
			
		||||
from .ministrygrid import MinistryGridIE
 | 
			
		||||
from .minoto import MinotoIE
 | 
			
		||||
@@ -560,7 +579,10 @@ from .parliamentliveuk import ParliamentLiveUKIE
 | 
			
		||||
from .patreon import PatreonIE
 | 
			
		||||
from .pbs import PBSIE
 | 
			
		||||
from .people import PeopleIE
 | 
			
		||||
from .periscope import PeriscopeIE
 | 
			
		||||
from .periscope import (
 | 
			
		||||
    PeriscopeIE,
 | 
			
		||||
    PeriscopeUserIE,
 | 
			
		||||
)
 | 
			
		||||
from .philharmoniedeparis import PhilharmonieDeParisIE
 | 
			
		||||
from .phoenix import PhoenixIE
 | 
			
		||||
from .photobucket import PhotobucketIE
 | 
			
		||||
@@ -601,6 +623,10 @@ from .qqmusic import (
 | 
			
		||||
    QQMusicPlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
from .r7 import R7IE
 | 
			
		||||
from .radiocanada import (
 | 
			
		||||
    RadioCanadaIE,
 | 
			
		||||
    RadioCanadaAudioVideoIE,
 | 
			
		||||
)
 | 
			
		||||
from .radiode import RadioDeIE
 | 
			
		||||
from .radiojavan import RadioJavanIE
 | 
			
		||||
from .radiobremen import RadioBremenIE
 | 
			
		||||
@@ -614,8 +640,12 @@ from .rds import RDSIE
 | 
			
		||||
from .redtube import RedTubeIE
 | 
			
		||||
from .regiotv import RegioTVIE
 | 
			
		||||
from .restudy import RestudyIE
 | 
			
		||||
from .reuters import ReutersIE
 | 
			
		||||
from .reverbnation import ReverbNationIE
 | 
			
		||||
from .revision3 import Revision3IE
 | 
			
		||||
from .revision3 import (
 | 
			
		||||
    Revision3EmbedIE,
 | 
			
		||||
    Revision3IE,
 | 
			
		||||
)
 | 
			
		||||
from .rice import RICEIE
 | 
			
		||||
from .ringtv import RingTVIE
 | 
			
		||||
from .ro220 import Ro220IE
 | 
			
		||||
@@ -654,7 +684,9 @@ from .screencast import ScreencastIE
 | 
			
		||||
from .screencastomatic import ScreencastOMaticIE
 | 
			
		||||
from .screenjunkies import ScreenJunkiesIE
 | 
			
		||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
 | 
			
		||||
from .seeker import SeekerIE
 | 
			
		||||
from .senateisvp import SenateISVPIE
 | 
			
		||||
from .sendtonews import SendtoNewsIE
 | 
			
		||||
from .servingsys import ServingSysIE
 | 
			
		||||
from .sexu import SexuIE
 | 
			
		||||
from .shahid import ShahidIE
 | 
			
		||||
@@ -757,6 +789,7 @@ from .thesixtyone import TheSixtyOneIE
 | 
			
		||||
from .thestar import TheStarIE
 | 
			
		||||
from .thisamericanlife import ThisAmericanLifeIE
 | 
			
		||||
from .thisav import ThisAVIE
 | 
			
		||||
from .threeqsdn import ThreeQSDNIE
 | 
			
		||||
from .tinypic import TinyPicIE
 | 
			
		||||
from .tlc import TlcDeIE
 | 
			
		||||
from .tmz import (
 | 
			
		||||
@@ -809,7 +842,10 @@ from .tvc import (
 | 
			
		||||
)
 | 
			
		||||
from .tvigle import TvigleIE
 | 
			
		||||
from .tvland import TVLandIE
 | 
			
		||||
from .tvp import TvpIE, TvpSeriesIE
 | 
			
		||||
from .tvp import (
 | 
			
		||||
    TVPIE,
 | 
			
		||||
    TVPSeriesIE,
 | 
			
		||||
)
 | 
			
		||||
from .tvplay import TVPlayIE
 | 
			
		||||
from .tweakers import TweakersIE
 | 
			
		||||
from .twentyfourvideo import TwentyFourVideoIE
 | 
			
		||||
@@ -824,7 +860,6 @@ from .twitch import (
 | 
			
		||||
    TwitchVodIE,
 | 
			
		||||
    TwitchProfileIE,
 | 
			
		||||
    TwitchPastBroadcastsIE,
 | 
			
		||||
    TwitchBookmarksIE,
 | 
			
		||||
    TwitchStreamIE,
 | 
			
		||||
)
 | 
			
		||||
from .twitter import (
 | 
			
		||||
@@ -842,14 +877,20 @@ from .unistra import UnistraIE
 | 
			
		||||
from .urort import UrortIE
 | 
			
		||||
from .usatoday import USATodayIE
 | 
			
		||||
from .ustream import UstreamIE, UstreamChannelIE
 | 
			
		||||
from .ustudio import UstudioIE
 | 
			
		||||
from .ustudio import (
 | 
			
		||||
    UstudioIE,
 | 
			
		||||
    UstudioEmbedIE,
 | 
			
		||||
)
 | 
			
		||||
from .varzesh3 import Varzesh3IE
 | 
			
		||||
from .vbox7 import Vbox7IE
 | 
			
		||||
from .veehd import VeeHDIE
 | 
			
		||||
from .veoh import VeohIE
 | 
			
		||||
from .vessel import VesselIE
 | 
			
		||||
from .vesti import VestiIE
 | 
			
		||||
from .vevo import VevoIE
 | 
			
		||||
from .vevo import (
 | 
			
		||||
    VevoIE,
 | 
			
		||||
    VevoPlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
from .vgtv import (
 | 
			
		||||
    BTArticleIE,
 | 
			
		||||
    BTVestlendingenIE,
 | 
			
		||||
@@ -871,6 +912,7 @@ from .videomore import (
 | 
			
		||||
)
 | 
			
		||||
from .videopremium import VideoPremiumIE
 | 
			
		||||
from .videott import VideoTtIE
 | 
			
		||||
from .vidio import VidioIE
 | 
			
		||||
from .vidme import (
 | 
			
		||||
    VidmeIE,
 | 
			
		||||
    VidmeUserIE,
 | 
			
		||||
@@ -916,21 +958,21 @@ from .vporn import VpornIE
 | 
			
		||||
from .vrt import VRTIE
 | 
			
		||||
from .vube import VubeIE
 | 
			
		||||
from .vuclip import VuClipIE
 | 
			
		||||
from .vulture import VultureIE
 | 
			
		||||
from .walla import WallaIE
 | 
			
		||||
from .washingtonpost import WashingtonPostIE
 | 
			
		||||
from .washingtonpost import (
 | 
			
		||||
    WashingtonPostIE,
 | 
			
		||||
    WashingtonPostArticleIE,
 | 
			
		||||
)
 | 
			
		||||
from .wat import WatIE
 | 
			
		||||
from .watchindianporn import WatchIndianPornIE
 | 
			
		||||
from .wdr import (
 | 
			
		||||
    WDRIE,
 | 
			
		||||
    WDRMobileIE,
 | 
			
		||||
    WDRMausIE,
 | 
			
		||||
)
 | 
			
		||||
from .webofstories import (
 | 
			
		||||
    WebOfStoriesIE,
 | 
			
		||||
    WebOfStoriesPlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
from .weibo import WeiboIE
 | 
			
		||||
from .weiqitv import WeiqiTVIE
 | 
			
		||||
from .wimp import WimpIE
 | 
			
		||||
from .wistia import WistiaIE
 | 
			
		||||
@@ -971,7 +1013,10 @@ from .yesjapan import YesJapanIE
 | 
			
		||||
from .yinyuetai import YinYueTaiIE
 | 
			
		||||
from .ynet import YnetIE
 | 
			
		||||
from .youjizz import YouJizzIE
 | 
			
		||||
from .youku import YoukuIE
 | 
			
		||||
from .youku import (
 | 
			
		||||
    YoukuIE,
 | 
			
		||||
    YoukuShowIE,
 | 
			
		||||
)
 | 
			
		||||
from .youporn import YouPornIE
 | 
			
		||||
from .yourupload import YourUploadIE
 | 
			
		||||
from .youtube import (
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										64
									
								
								youtube_dl/extractor/eyedotv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								youtube_dl/extractor/eyedotv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,64 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EyedoTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
 | 
			
		||||
        'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '16301',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Journée du conseil scientifique de l\'Afnic 2015',
 | 
			
		||||
            'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
 | 
			
		||||
            'uploader': 'Afnic Live',
 | 
			
		||||
            'uploader_id': '8023',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    _ROOT_URL = 'http://live.eyedo.net:1935/'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        def _add_ns(path):
 | 
			
		||||
            return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
 | 
			
		||||
 | 
			
		||||
        title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
 | 
			
		||||
        state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
 | 
			
		||||
        if state_live_code == 'avenir':
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
        is_live = state_live_code == 'live'
 | 
			
		||||
        m3u8_url = None
 | 
			
		||||
        # http://eyedo.tv/Content/Html5/Scripts/html5view.js
 | 
			
		||||
        if is_live:
 | 
			
		||||
            if xpath_text(video_data, 'Cdn') == 'true':
 | 
			
		||||
                m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
 | 
			
		||||
            else:
 | 
			
		||||
                m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
 | 
			
		||||
        else:
 | 
			
		||||
            m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'formats': self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
 | 
			
		||||
            'description': xpath_text(video_data, _add_ns('Description')),
 | 
			
		||||
            'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
 | 
			
		||||
            'uploader': xpath_text(video_data, _add_ns('Createur')),
 | 
			
		||||
            'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
 | 
			
		||||
            'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
 | 
			
		||||
            'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,20 +1,19 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FczenitIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://fc-zenit.ru/video/gl6785/',
 | 
			
		||||
        'md5': '458bacc24549173fe5a5aa29174a5606',
 | 
			
		||||
        'url': 'http://fc-zenit.ru/video/41044/',
 | 
			
		||||
        'md5': '0e3fab421b455e970fa1aa3891e57df0',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '6785',
 | 
			
		||||
            'id': '41044',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
 | 
			
		||||
            'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -22,15 +21,23 @@ class FczenitIE(InfoExtractor):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
            r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
 | 
			
		||||
        bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
 | 
			
		||||
        video_items = self._parse_json(self._search_regex(
 | 
			
		||||
            r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'),
 | 
			
		||||
            video_id)
 | 
			
		||||
 | 
			
		||||
        def merge_dicts(*dicts):
 | 
			
		||||
            ret = {}
 | 
			
		||||
            for a_dict in dicts:
 | 
			
		||||
                ret.update(a_dict)
 | 
			
		||||
            return ret
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'url': furl,
 | 
			
		||||
            'tbr': tbr,
 | 
			
		||||
        } for furl, tbr in bitrates]
 | 
			
		||||
            'url': compat_urlparse.urljoin(url, video_url),
 | 
			
		||||
            'tbr': int(tbr),
 | 
			
		||||
        } for tbr, video_url in merge_dicts(*video_items).items()]
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -24,13 +24,28 @@ class FlickrIE(InfoExtractor):
 | 
			
		||||
            'upload_date': '20110423',
 | 
			
		||||
            'uploader_id': '10922353@N03',
 | 
			
		||||
            'uploader': 'Forest Wander',
 | 
			
		||||
            'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/',
 | 
			
		||||
            'comment_count': int,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'tags': list,
 | 
			
		||||
            'license': 'Attribution-ShareAlike',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _API_BASE_URL = 'https://api.flickr.com/services/rest?'
 | 
			
		||||
    # https://help.yahoo.com/kb/flickr/SLN25525.html
 | 
			
		||||
    _LICENSES = {
 | 
			
		||||
        '0': 'All Rights Reserved',
 | 
			
		||||
        '1': 'Attribution-NonCommercial-ShareAlike',
 | 
			
		||||
        '2': 'Attribution-NonCommercial',
 | 
			
		||||
        '3': 'Attribution-NonCommercial-NoDerivs',
 | 
			
		||||
        '4': 'Attribution',
 | 
			
		||||
        '5': 'Attribution-ShareAlike',
 | 
			
		||||
        '6': 'Attribution-NoDerivs',
 | 
			
		||||
        '7': 'No known copyright restrictions',
 | 
			
		||||
        '8': 'United States government work',
 | 
			
		||||
        '9': 'Public Domain Dedication (CC0)',
 | 
			
		||||
        '10': 'Public Domain Work',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _call_api(self, method, video_id, api_key, note, secret=None):
 | 
			
		||||
        query = {
 | 
			
		||||
@@ -75,6 +90,9 @@ class FlickrIE(InfoExtractor):
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
            owner = video_info.get('owner', {})
 | 
			
		||||
            uploader_id = owner.get('nsid')
 | 
			
		||||
            uploader_path = owner.get('path_alias') or uploader_id
 | 
			
		||||
            uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None
 | 
			
		||||
 | 
			
		||||
            return {
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
@@ -83,11 +101,13 @@ class FlickrIE(InfoExtractor):
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
                'timestamp': int_or_none(video_info.get('dateuploaded')),
 | 
			
		||||
                'duration': int_or_none(video_info.get('video', {}).get('duration')),
 | 
			
		||||
                'uploader_id': owner.get('nsid'),
 | 
			
		||||
                'uploader_id': uploader_id,
 | 
			
		||||
                'uploader': owner.get('realname'),
 | 
			
		||||
                'uploader_url': uploader_url,
 | 
			
		||||
                'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
 | 
			
		||||
                'view_count': int_or_none(video_info.get('views')),
 | 
			
		||||
                'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
 | 
			
		||||
                'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])],
 | 
			
		||||
                'license': self._LICENSES.get(video_info.get('license')),
 | 
			
		||||
            }
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError('not a video', expected=True)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										26
									
								
								youtube_dl/extractor/formula1.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								youtube_dl/extractor/formula1.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,26 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Formula1IE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
 | 
			
		||||
        'md5': '8c79e54be72078b26b89e0e111c0502b',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Race highlights - Spain 2016',
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['Ooyala'],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
        ooyala_embed_code = self._search_regex(
 | 
			
		||||
            r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
 | 
			
		||||
        return self.url_result(
 | 
			
		||||
            'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
 | 
			
		||||
@@ -61,6 +61,9 @@ from .jwplatform import JWPlatformIE
 | 
			
		||||
from .digiteka import DigitekaIE
 | 
			
		||||
from .instagram import InstagramIE
 | 
			
		||||
from .liveleak import LiveLeakIE
 | 
			
		||||
from .threeqsdn import ThreeQSDNIE
 | 
			
		||||
from .theplatform import ThePlatformIE
 | 
			
		||||
from .vessel import VesselIE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GenericIE(InfoExtractor):
 | 
			
		||||
@@ -624,13 +627,13 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
        # MTVSercices embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 | 
			
		||||
            'md5': '35727f82f58c76d996fc188f9755b0d5',
 | 
			
		||||
            'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
 | 
			
		||||
            'md5': 'ca1aef97695ef2c1d6973256a57e5252',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 | 
			
		||||
                'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Review',
 | 
			
		||||
                'description': 'Mario\'s life in the fast lane has never looked so good.',
 | 
			
		||||
                'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
 | 
			
		||||
                'description': 'Two valets share their love for movie star Liam Neesons.',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # YouTube embed via <data-embed-url="">
 | 
			
		||||
@@ -716,15 +719,18 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
        # Wistia embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 | 
			
		||||
            'md5': '8788b683c777a5cf25621eaf286d0c23',
 | 
			
		||||
            'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 | 
			
		||||
            'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '1cfaf6b7ea',
 | 
			
		||||
                'id': '6e2wtrbdaf',
 | 
			
		||||
                'ext': 'mov',
 | 
			
		||||
                'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 | 
			
		||||
                'duration': 643.0,
 | 
			
		||||
                'filesize': 182808282,
 | 
			
		||||
                'uploader': 'education-portal.com',
 | 
			
		||||
                'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 | 
			
		||||
                'description': 'a Paywall Videos video from Remilon',
 | 
			
		||||
                'duration': 644.072,
 | 
			
		||||
                'uploader': 'study.com',
 | 
			
		||||
                'timestamp': 1459678540,
 | 
			
		||||
                'upload_date': '20160403',
 | 
			
		||||
                'filesize': 24687186,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -733,14 +739,30 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'uxjb0lwrcz',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 | 
			
		||||
                'title': 'Conversation about Hexagonal Rails Part 1',
 | 
			
		||||
                'description': 'a Martin Fowler video from ThoughtWorks',
 | 
			
		||||
                'duration': 1715.0,
 | 
			
		||||
                'uploader': 'thoughtworks.wistia.com',
 | 
			
		||||
                'upload_date': '20140603',
 | 
			
		||||
                'timestamp': 1401832161,
 | 
			
		||||
                'upload_date': '20140603',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # Wistia standard embed (async)
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '807fafadvk',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Drip Brennan Dunn Workshop',
 | 
			
		||||
                'description': 'a JV Webinars video from getdrip-1',
 | 
			
		||||
                'duration': 4986.95,
 | 
			
		||||
                'timestamp': 1463607249,
 | 
			
		||||
                'upload_date': '20160518',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # Soundcloud embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 | 
			
		||||
@@ -763,6 +785,19 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'title': 'Rosetta #CometLanding webcast HL 10',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # Another Livestream embed, without 'new.' in URL
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://www.freespeech.org/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '123537347',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # Live stream
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # LazyYT
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 | 
			
		||||
@@ -847,18 +882,6 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # Kaltura embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '1_eergr3h1',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'upload_date': '20150226',
 | 
			
		||||
                'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 | 
			
		||||
                'timestamp': int,
 | 
			
		||||
                'title': 'John Carlson Postgame 2/25/15',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # Kaltura embed (different embed code)
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 | 
			
		||||
@@ -884,6 +907,19 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'uploader_id': 'echojecka',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # Kaltura embed with single quotes
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '0_izeg5utt',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': '35871',
 | 
			
		||||
                'timestamp': 1355743100,
 | 
			
		||||
                'upload_date': '20121217',
 | 
			
		||||
                'uploader_id': 'batchUser',
 | 
			
		||||
            },
 | 
			
		||||
            'add_ie': ['Kaltura'],
 | 
			
		||||
        },
 | 
			
		||||
        # Eagle.Platform embed (generic URL)
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 | 
			
		||||
@@ -996,16 +1032,31 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'timestamp': 1389118457,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # NBC News embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
 | 
			
		||||
            'md5': '1aa589c675898ae6d37a17913cf68d66',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '701714499682',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'PREVIEW: On Assignment: David Letterman',
 | 
			
		||||
                'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # UDN embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.udn.com/news/story/7314/822787',
 | 
			
		||||
            'url': 'https://video.udn.com/news/300346',
 | 
			
		||||
            'md5': 'fd2060e988c326991037b9aff9df21a6',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '300346',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': '中一中男師變性 全校師生力挺',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            }
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # Ooyala embed
 | 
			
		||||
        {
 | 
			
		||||
@@ -1173,6 +1224,16 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'uploader': 'Lake8737',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # Duplicated embedded video URLs
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '149298443_480_16c25b74_2',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'vs. Blue Orange Spring Game',
 | 
			
		||||
                'uploader': 'www.hudl.com',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def report_following_redirect(self, new_url):
 | 
			
		||||
@@ -1427,7 +1488,8 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        #   Site Name | Video Title
 | 
			
		||||
        #   Video Title - Tagline | Site Name
 | 
			
		||||
        # and so on and so forth; it's just not practical
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
        video_title = self._og_search_title(
 | 
			
		||||
            webpage, default=None) or self._html_search_regex(
 | 
			
		||||
            r'(?s)<title>(.*?)</title>', webpage, 'video title',
 | 
			
		||||
            default='video')
 | 
			
		||||
 | 
			
		||||
@@ -1445,6 +1507,9 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        video_uploader = self._search_regex(
 | 
			
		||||
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 | 
			
		||||
 | 
			
		||||
        video_description = self._og_search_description(webpage, default=None)
 | 
			
		||||
        video_thumbnail = self._og_search_thumbnail(webpage, default=None)
 | 
			
		||||
 | 
			
		||||
        # Helper method
 | 
			
		||||
        def _playlist_from_matches(matches, getter=None, ie=None):
 | 
			
		||||
            urlrs = orderedSet(
 | 
			
		||||
@@ -1475,6 +1540,16 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        if bc_urls:
 | 
			
		||||
            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
 | 
			
		||||
 | 
			
		||||
        # Look for ThePlatform embeds
 | 
			
		||||
        tp_urls = ThePlatformIE._extract_urls(webpage)
 | 
			
		||||
        if tp_urls:
 | 
			
		||||
            return _playlist_from_matches(tp_urls, ie='ThePlatform')
 | 
			
		||||
 | 
			
		||||
        # Look for Vessel embeds
 | 
			
		||||
        vessel_urls = VesselIE._extract_urls(webpage)
 | 
			
		||||
        if vessel_urls:
 | 
			
		||||
            return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
 | 
			
		||||
 | 
			
		||||
        # Look for embedded rtl.nl player
 | 
			
		||||
        matches = re.findall(
 | 
			
		||||
            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
 | 
			
		||||
@@ -1543,21 +1618,26 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'url': embed_url,
 | 
			
		||||
                'ie_key': 'Wistia',
 | 
			
		||||
                'uploader': video_uploader,
 | 
			
		||||
                'title': video_title,
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
 | 
			
		||||
        if match:
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'url_transparent',
 | 
			
		||||
                'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
 | 
			
		||||
                'url': 'wistia:%s' % match.group('id'),
 | 
			
		||||
                'ie_key': 'Wistia',
 | 
			
		||||
                'uploader': video_uploader,
 | 
			
		||||
                'title': video_title,
 | 
			
		||||
                'id': match.group('id')
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        match = re.search(
 | 
			
		||||
            r'''(?sx)
 | 
			
		||||
                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
 | 
			
		||||
                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
 | 
			
		||||
            ''', webpage)
 | 
			
		||||
        if match:
 | 
			
		||||
            return self.url_result(self._proto_relative_url(
 | 
			
		||||
                'wistia:%s' % match.group('id')), 'Wistia')
 | 
			
		||||
 | 
			
		||||
        # Look for SVT player
 | 
			
		||||
        svt_url = SVTIE._extract_url(webpage)
 | 
			
		||||
        if svt_url:
 | 
			
		||||
@@ -1777,14 +1857,6 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            url = unescapeHTML(mobj.group('url'))
 | 
			
		||||
            return self.url_result(url)
 | 
			
		||||
 | 
			
		||||
        # Look for embedded vulture.com player
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            url = unescapeHTML(mobj.group('url'))
 | 
			
		||||
            return self.url_result(url, ie='Vulture')
 | 
			
		||||
 | 
			
		||||
        # Look for embedded mtvservices player
 | 
			
		||||
        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
 | 
			
		||||
        if mtvservices_url:
 | 
			
		||||
@@ -1833,7 +1905,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
 | 
			
		||||
            r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(mobj.group('url'), 'Livestream')
 | 
			
		||||
@@ -1845,7 +1917,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            return self.url_result(mobj.group('url'), 'Zapiks')
 | 
			
		||||
 | 
			
		||||
        # Look for Kaltura embeds
 | 
			
		||||
        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
 | 
			
		||||
        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
 | 
			
		||||
                re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(smuggle_url(
 | 
			
		||||
@@ -1897,6 +1969,12 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        if nbc_sports_url:
 | 
			
		||||
            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 | 
			
		||||
 | 
			
		||||
        # Look for NBC News embeds
 | 
			
		||||
        nbc_news_embed_url = re.search(
 | 
			
		||||
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
 | 
			
		||||
        if nbc_news_embed_url:
 | 
			
		||||
            return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
 | 
			
		||||
 | 
			
		||||
        # Look for Google Drive embeds
 | 
			
		||||
        google_drive_url = GoogleDriveIE._extract_url(webpage)
 | 
			
		||||
        if google_drive_url:
 | 
			
		||||
@@ -1983,6 +2061,19 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        if liveleak_url:
 | 
			
		||||
            return self.url_result(liveleak_url, 'LiveLeak')
 | 
			
		||||
 | 
			
		||||
        # Look for 3Q SDN embeds
 | 
			
		||||
        threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
 | 
			
		||||
        if threeqsdn_url:
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'url_transparent',
 | 
			
		||||
                'ie_key': ThreeQSDNIE.ie_key(),
 | 
			
		||||
                'url': self._proto_relative_url(threeqsdn_url),
 | 
			
		||||
                'title': video_title,
 | 
			
		||||
                'description': video_description,
 | 
			
		||||
                'thumbnail': video_thumbnail,
 | 
			
		||||
                'uploader': video_uploader,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        def check_video(vurl):
 | 
			
		||||
            if YoutubeIE.suitable(vurl):
 | 
			
		||||
                return True
 | 
			
		||||
@@ -2063,7 +2154,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            raise UnsupportedError(url)
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for video_url in found:
 | 
			
		||||
        for video_url in orderedSet(found):
 | 
			
		||||
            video_url = unescapeHTML(video_url)
 | 
			
		||||
            video_url = video_url.replace('\\/', '/')
 | 
			
		||||
            video_url = compat_urlparse.urljoin(url, video_url)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/godtv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/godtv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,66 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .ooyala import OoyalaIE
 | 
			
		||||
from ..utils import js_to_json
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GodTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Randy Needham',
 | 
			
		||||
            'duration': 3615.08,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://god.tv/playlist/bible-study',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'bible-study',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 37,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://god.tv/node/15097',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://god.tv/live/africa',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://god.tv/liveevents',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        settings = self._parse_json(
 | 
			
		||||
            self._search_regex(
 | 
			
		||||
                r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
 | 
			
		||||
                webpage, 'settings', default='{}'),
 | 
			
		||||
            display_id, transform_source=js_to_json, fatal=False)
 | 
			
		||||
 | 
			
		||||
        ooyala_id = None
 | 
			
		||||
 | 
			
		||||
        if settings:
 | 
			
		||||
            playlist = settings.get('playlist')
 | 
			
		||||
            if playlist and isinstance(playlist, list):
 | 
			
		||||
                entries = [
 | 
			
		||||
                    OoyalaIE._build_url_result(video['content_id'])
 | 
			
		||||
                    for video in playlist if video.get('content_id')]
 | 
			
		||||
                if entries:
 | 
			
		||||
                    return self.playlist_result(entries, display_id)
 | 
			
		||||
            ooyala_id = settings.get('ooyala', {}).get('content_id')
 | 
			
		||||
 | 
			
		||||
        if not ooyala_id:
 | 
			
		||||
            ooyala_id = self._search_regex(
 | 
			
		||||
                r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
 | 
			
		||||
                webpage, 'ooyala id', group='id')
 | 
			
		||||
 | 
			
		||||
        return OoyalaIE._build_url_result(ooyala_id)
 | 
			
		||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GrouponIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
 | 
			
		||||
@@ -14,17 +14,27 @@ class GrouponIE(InfoExtractor):
 | 
			
		||||
            'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'md5': '42428ce8a00585f9bc36e49226eae7a1',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Bikram Yoga Huntington Beach | Orange County',
 | 
			
		||||
                'id': 'fk6OhWpXgIQ',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
 | 
			
		||||
                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
 | 
			
		||||
                'duration': 44.961,
 | 
			
		||||
                'duration': 45,
 | 
			
		||||
                'upload_date': '20160405',
 | 
			
		||||
                'uploader_id': 'groupon',
 | 
			
		||||
                'uploader': 'Groupon',
 | 
			
		||||
            },
 | 
			
		||||
            'add_ie': ['Youtube'],
 | 
			
		||||
        }],
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': 'HDS',
 | 
			
		||||
        }
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _PROVIDERS = {
 | 
			
		||||
        'ooyala': ('ooyala:%s', 'Ooyala'),
 | 
			
		||||
        'youtube': ('%s', 'Youtube'),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -36,12 +46,17 @@ class GrouponIE(InfoExtractor):
 | 
			
		||||
        videos = payload['carousel'].get('dealVideos', [])
 | 
			
		||||
        entries = []
 | 
			
		||||
        for v in videos:
 | 
			
		||||
            if v.get('provider') != 'OOYALA':
 | 
			
		||||
            provider = v.get('provider')
 | 
			
		||||
            video_id = v.get('media') or v.get('id') or v.get('baseURL')
 | 
			
		||||
            if not provider or not video_id:
 | 
			
		||||
                continue
 | 
			
		||||
            url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
 | 
			
		||||
            if not url_pattern:
 | 
			
		||||
                self.report_warning(
 | 
			
		||||
                    '%s: Unsupported video provider %s, skipping video' %
 | 
			
		||||
                    (playlist_id, v.get('provider')))
 | 
			
		||||
                    (playlist_id, provider))
 | 
			
		||||
                continue
 | 
			
		||||
            entries.append(self.url_result('ooyala:%s' % v['media']))
 | 
			
		||||
            entries.append(self.url_result(url_pattern % video_id, ie_key))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
 
 | 
			
		||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    KNOWN_EXTENSIONS,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
@@ -17,7 +18,7 @@ from ..utils import (
 | 
			
		||||
class HearThisAtIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
 | 
			
		||||
    _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://hearthis.at/moofi/dr-kreep',
 | 
			
		||||
        'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -26,7 +27,7 @@ class HearThisAtIE(InfoExtractor):
 | 
			
		||||
            'title': 'Moofi - Dr. Kreep',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            'timestamp': 1421564134,
 | 
			
		||||
            'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
 | 
			
		||||
            'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
 | 
			
		||||
            'upload_date': '20150118',
 | 
			
		||||
            'comment_count': int,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
@@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor):
 | 
			
		||||
            'duration': 71,
 | 
			
		||||
            'categories': ['Experimental'],
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    }, {
 | 
			
		||||
        # 'download' link redirects to the original webpage
 | 
			
		||||
        'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
 | 
			
		||||
        'md5': '5980ceb7c461605d30f1f039df160c6e',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '811296',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'title': 'TwitchSF - DJ Jim Hopkins -  Totally Bitchin\' 80\'s Dance Mix!',
 | 
			
		||||
            'description': 'Listen to DJ Jim Hopkins -  Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
 | 
			
		||||
            'upload_date': '20160328',
 | 
			
		||||
            'timestamp': 1459186146,
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            'comment_count': int,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'like_count': int,
 | 
			
		||||
            'duration': 4360,
 | 
			
		||||
            'categories': ['Dance'],
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        m = re.match(self._VALID_URL, url)
 | 
			
		||||
@@ -90,13 +109,14 @@ class HearThisAtIE(InfoExtractor):
 | 
			
		||||
            ext_handle = self._request_webpage(
 | 
			
		||||
                ext_req, display_id, note='Determining extension')
 | 
			
		||||
            ext = urlhandle_detect_ext(ext_handle)
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': 'download',
 | 
			
		||||
                'vcodec': 'none',
 | 
			
		||||
                'ext': ext,
 | 
			
		||||
                'url': download_url,
 | 
			
		||||
                'preference': 2,  # Usually better quality
 | 
			
		||||
            })
 | 
			
		||||
            if ext in KNOWN_EXTENSIONS:
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': 'download',
 | 
			
		||||
                    'vcodec': 'none',
 | 
			
		||||
                    'ext': ext,
 | 
			
		||||
                    'url': download_url,
 | 
			
		||||
                    'preference': 2,  # Usually better quality
 | 
			
		||||
                })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
 | 
			
		||||
        'md5': '8b743df908c42f60cf6496586c7f12c3',
 | 
			
		||||
        'md5': '7d45932269a288149483144f01b99789',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '390161',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
 | 
			
		||||
            'duration': 56.823,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['Ooyala'],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,10 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    mimetype2ext,
 | 
			
		||||
    qualities,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -12,9 +12,9 @@ from ..utils import (
 | 
			
		||||
class ImdbIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'imdb'
 | 
			
		||||
    IE_DESC = 'Internet Movie Database trailers'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.imdb.com/video/imdb/vi2524815897',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2524815897',
 | 
			
		||||
@@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor):
 | 
			
		||||
            'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
 | 
			
		||||
            'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.imdb.com/video/_/vi2524815897',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
@@ -48,13 +51,27 @@ class ImdbIE(InfoExtractor):
 | 
			
		||||
            json_data = self._search_regex(
 | 
			
		||||
                r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
 | 
			
		||||
                format_page, 'json data', flags=re.DOTALL)
 | 
			
		||||
            info = json.loads(json_data)
 | 
			
		||||
            format_info = info['videoPlayerObject']['video']
 | 
			
		||||
            f_id = format_info['ffname']
 | 
			
		||||
            info = self._parse_json(json_data, video_id, fatal=False)
 | 
			
		||||
            if not info:
 | 
			
		||||
                continue
 | 
			
		||||
            format_info = info.get('videoPlayerObject', {}).get('video', {})
 | 
			
		||||
            if not format_info:
 | 
			
		||||
                continue
 | 
			
		||||
            video_info_list = format_info.get('videoInfoList')
 | 
			
		||||
            if not video_info_list or not isinstance(video_info_list, list):
 | 
			
		||||
                continue
 | 
			
		||||
            video_info = video_info_list[0]
 | 
			
		||||
            if not video_info or not isinstance(video_info, dict):
 | 
			
		||||
                continue
 | 
			
		||||
            video_url = video_info.get('videoUrl')
 | 
			
		||||
            if not video_url:
 | 
			
		||||
                continue
 | 
			
		||||
            format_id = format_info.get('ffname')
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': f_id,
 | 
			
		||||
                'url': format_info['videoInfoList'][0]['videoUrl'],
 | 
			
		||||
                'quality': quality(f_id),
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': mimetype2ext(video_info.get('videoMimeType')),
 | 
			
		||||
                'quality': quality(format_id),
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -505,7 +505,10 @@ class IqiyiIE(InfoExtractor):
 | 
			
		||||
            'enc': md5_text(enc_key + tail),
 | 
			
		||||
            'qyid': _uuid,
 | 
			
		||||
            'tn': random.random(),
 | 
			
		||||
            'um': 0,
 | 
			
		||||
            # In iQiyi's flash player, um is set to 1 if there's a logged user
 | 
			
		||||
            # Some 1080P formats are only available with a logged user.
 | 
			
		||||
            # Here force um=1 to trick the iQiyi server
 | 
			
		||||
            'um': 1,
 | 
			
		||||
            'authkey': md5_text(md5_text('') + tail),
 | 
			
		||||
            'k_tag': 1,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -5,33 +5,50 @@ import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class JWPlatformBaseIE(InfoExtractor):
 | 
			
		||||
    def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
 | 
			
		||||
    def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
 | 
			
		||||
        video_data = jwplayer_data['playlist'][0]
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for source in video_data['sources']:
 | 
			
		||||
            source_url = self._proto_relative_url(source['file'])
 | 
			
		||||
            source_type = source.get('type') or ''
 | 
			
		||||
            if source_type in ('application/vnd.apple.mpegurl', 'hls'):
 | 
			
		||||
            if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
 | 
			
		||||
                    source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
 | 
			
		||||
            elif source_type.startswith('audio'):
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': source_url,
 | 
			
		||||
                    'vcodec': 'none',
 | 
			
		||||
                })
 | 
			
		||||
            else:
 | 
			
		||||
                formats.append({
 | 
			
		||||
                a_format = {
 | 
			
		||||
                    'url': source_url,
 | 
			
		||||
                    'width': int_or_none(source.get('width')),
 | 
			
		||||
                    'height': int_or_none(source.get('height')),
 | 
			
		||||
                })
 | 
			
		||||
                }
 | 
			
		||||
                if source_url.startswith('rtmp'):
 | 
			
		||||
                    a_format['ext'] = 'flv',
 | 
			
		||||
 | 
			
		||||
                    # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
 | 
			
		||||
                    # of jwplayer.flash.swf
 | 
			
		||||
                    rtmp_url_parts = re.split(
 | 
			
		||||
                        r'((?:mp4|mp3|flv):)', source_url, 1)
 | 
			
		||||
                    if len(rtmp_url_parts) == 3:
 | 
			
		||||
                        rtmp_url, prefix, play_path = rtmp_url_parts
 | 
			
		||||
                        a_format.update({
 | 
			
		||||
                            'url': rtmp_url,
 | 
			
		||||
                            'play_path': prefix + play_path,
 | 
			
		||||
                        })
 | 
			
		||||
                    if rtmp_params:
 | 
			
		||||
                        a_format.update(rtmp_params)
 | 
			
		||||
                formats.append(a_format)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
 
 | 
			
		||||
@@ -283,6 +283,8 @@ class KuwoCategoryIE(InfoExtractor):
 | 
			
		||||
        category_desc = remove_start(
 | 
			
		||||
            get_element_by_id('intro', webpage).strip(),
 | 
			
		||||
            '%s简介:' % category_name)
 | 
			
		||||
        if category_desc == '暂无':
 | 
			
		||||
            category_desc = None
 | 
			
		||||
 | 
			
		||||
        jsonm = self._parse_json(self._html_search_regex(
 | 
			
		||||
            r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/learnr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/learnr.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,33 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LearnrIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
 | 
			
		||||
        'md5': '3719fdf0a68397f49899e82c308a89de',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '51624',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
 | 
			
		||||
            'description': 'md5:b36dbfa92350176cdf12b4d388485503',
 | 
			
		||||
            'uploader': 'LearnCode.academy',
 | 
			
		||||
            'uploader_id': 'learncodeacademy',
 | 
			
		||||
            'upload_date': '20131021',
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['Youtube'],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'url': self._search_regex(
 | 
			
		||||
                r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										143
									
								
								youtube_dl/extractor/libraryofcongress.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								youtube_dl/extractor/libraryofcongress.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,143 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_filesize,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LibraryOfCongressIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'loc'
 | 
			
		||||
    IE_DESC = 'Library of Congress'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # embedded via <div class="media-player"
 | 
			
		||||
        'url': 'http://loc.gov/item/90716351/',
 | 
			
		||||
        'md5': '353917ff7f0255aa6d4b80a034833de8',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '90716351',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': "Pa's trip to Mars",
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            'duration': 0,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # webcast embedded via mediaObjectId
 | 
			
		||||
        'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '5578',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Help! Preservation Training Needs Here, There & Everywhere',
 | 
			
		||||
            'duration': 3765,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'subtitles': 'mincount:1',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # with direct download links
 | 
			
		||||
        'url': 'https://www.loc.gov/item/78710669/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '78710669',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'La vie et la passion de Jesus-Christ',
 | 
			
		||||
            'duration': 0,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'formats': 'mincount:4',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        media_id = self._search_regex(
 | 
			
		||||
            (r'id=(["\'])media-player-(?P<id>.+?)\1',
 | 
			
		||||
             r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
 | 
			
		||||
             r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
 | 
			
		||||
             r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
 | 
			
		||||
            webpage, 'media id', group='id')
 | 
			
		||||
 | 
			
		||||
        data = self._download_json(
 | 
			
		||||
            'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
 | 
			
		||||
            video_id)['mediaObject']
 | 
			
		||||
 | 
			
		||||
        derivative = data['derivatives'][0]
 | 
			
		||||
        media_url = derivative['derivativeUrl']
 | 
			
		||||
 | 
			
		||||
        title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(
 | 
			
		||||
            webpage)
 | 
			
		||||
 | 
			
		||||
        # Following algorithm was extracted from setAVSource js function
 | 
			
		||||
        # found in webpage
 | 
			
		||||
        media_url = media_url.replace('rtmp', 'https')
 | 
			
		||||
 | 
			
		||||
        is_video = data.get('mediaType', 'v').lower() == 'v'
 | 
			
		||||
        ext = determine_ext(media_url)
 | 
			
		||||
        if ext not in ('mp4', 'mp3'):
 | 
			
		||||
            media_url += '.mp4' if is_video else '.mp3'
 | 
			
		||||
 | 
			
		||||
        if 'vod/mp4:' in media_url:
 | 
			
		||||
            formats = [{
 | 
			
		||||
                'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
 | 
			
		||||
                'format_id': 'hls',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'protocol': 'm3u8_native',
 | 
			
		||||
                'quality': 1,
 | 
			
		||||
            }]
 | 
			
		||||
        elif 'vod/mp3:' in media_url:
 | 
			
		||||
            formats = [{
 | 
			
		||||
                'url': media_url.replace('vod/mp3:', ''),
 | 
			
		||||
                'vcodec': 'none',
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        download_urls = set()
 | 
			
		||||
        for m in re.finditer(
 | 
			
		||||
                r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
 | 
			
		||||
            format_id = m.group('id').lower()
 | 
			
		||||
            if format_id == 'gif':
 | 
			
		||||
                continue
 | 
			
		||||
            download_url = m.group('url')
 | 
			
		||||
            if download_url in download_urls:
 | 
			
		||||
                continue
 | 
			
		||||
            download_urls.add(download_url)
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': download_url,
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'filesize_approx': parse_filesize(m.group('size')),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        duration = float_or_none(data.get('duration'))
 | 
			
		||||
        view_count = int_or_none(data.get('viewCount'))
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        cc_url = data.get('ccUrl')
 | 
			
		||||
        if cc_url:
 | 
			
		||||
            subtitles.setdefault('en', []).append({
 | 
			
		||||
                'url': cc_url,
 | 
			
		||||
                'ext': 'ttml',
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage, default=None),
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        }
 | 
			
		||||
@@ -7,48 +7,53 @@ from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    remove_end,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    remove_end,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LifeNewsIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'lifenews'
 | 
			
		||||
    IE_DESC = 'LIFE | NEWS'
 | 
			
		||||
    _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
 | 
			
		||||
    IE_NAME = 'life'
 | 
			
		||||
    IE_DESC = 'Life.ru'
 | 
			
		||||
    _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # single video embedded via video/source
 | 
			
		||||
        'url': 'http://lifenews.ru/news/98736',
 | 
			
		||||
        'url': 'https://life.ru/t/новости/98736',
 | 
			
		||||
        'md5': '77c95eaefaca216e32a76a343ad89d23',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '98736',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Мужчина нашел дома архив оборонного завода',
 | 
			
		||||
            'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
 | 
			
		||||
            'timestamp': 1344154740,
 | 
			
		||||
            'upload_date': '20120805',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # single video embedded via iframe
 | 
			
		||||
        'url': 'http://lifenews.ru/news/152125',
 | 
			
		||||
        'url': 'https://life.ru/t/новости/152125',
 | 
			
		||||
        'md5': '77d19a6f0886cd76bdbf44b4d971a273',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '152125',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
 | 
			
		||||
            'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
 | 
			
		||||
            'timestamp': 1427961840,
 | 
			
		||||
            'upload_date': '20150402',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # two videos embedded via iframe
 | 
			
		||||
        'url': 'http://lifenews.ru/news/153461',
 | 
			
		||||
        'url': 'https://life.ru/t/новости/153461',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '153461',
 | 
			
		||||
            'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
 | 
			
		||||
            'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
 | 
			
		||||
            'upload_date': '20150505',
 | 
			
		||||
            'timestamp': 1430825520,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        },
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
 | 
			
		||||
@@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
 | 
			
		||||
                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
 | 
			
		||||
                'timestamp': 1430825520,
 | 
			
		||||
                'upload_date': '20150505',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
@@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
 | 
			
		||||
                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
 | 
			
		||||
                'timestamp': 1430825520,
 | 
			
		||||
                'upload_date': '20150505',
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://lifenews.ru/video/13035',
 | 
			
		||||
        'url': 'https://life.ru/t/новости/213035',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        section = mobj.group('section')
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            'http://lifenews.ru/%s/%s' % (section, video_id),
 | 
			
		||||
            video_id, 'Downloading page')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_urls = re.findall(
 | 
			
		||||
            r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
 | 
			
		||||
@@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        title = remove_end(
 | 
			
		||||
            self._og_search_title(webpage),
 | 
			
		||||
            ' - Первый по срочным новостям — LIFE | NEWS')
 | 
			
		||||
            ' - Life.ru')
 | 
			
		||||
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
 | 
			
		||||
        view_count = self._html_search_regex(
 | 
			
		||||
            r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
 | 
			
		||||
        comment_count = self._html_search_regex(
 | 
			
		||||
            r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
 | 
			
		||||
            webpage, 'comment count', fatal=False)
 | 
			
		||||
            r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
 | 
			
		||||
            webpage, 'view count', fatal=False, group='value')
 | 
			
		||||
 | 
			
		||||
        upload_date = self._html_search_regex(
 | 
			
		||||
            r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
 | 
			
		||||
        if upload_date is not None:
 | 
			
		||||
            upload_date = unified_strdate(upload_date)
 | 
			
		||||
        timestamp = parse_iso8601(self._search_regex(
 | 
			
		||||
            r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
 | 
			
		||||
            webpage, 'upload date', fatal=False, group='value'))
 | 
			
		||||
 | 
			
		||||
        common_info = {
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'view_count': int_or_none(view_count),
 | 
			
		||||
            'comment_count': int_or_none(comment_count),
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        def make_entry(video_id, video_url, index=None):
 | 
			
		||||
@@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
 | 
			
		||||
            ext = determine_ext(video_url)
 | 
			
		||||
            if ext == 'm3u8':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    video_url, video_id, 'mp4', m3u8_id='m3u8'))
 | 
			
		||||
                    video_url, video_id, 'mp4',
 | 
			
		||||
                    entry_protocol='m3u8_native', m3u8_id='m3u8'))
 | 
			
		||||
            else:
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': video_url,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										137
									
								
								youtube_dl/extractor/litv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								youtube_dl/extractor/litv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,137 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LiTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P<id>[^&]+)'
 | 
			
		||||
 | 
			
		||||
    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'VOD00041606',
 | 
			
		||||
            'title': '花千骨',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 50,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'VOD00041610',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': '花千骨第1集',
 | 
			
		||||
            'thumbnail': 're:https?://.*\.jpg$',
 | 
			
		||||
            'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
 | 
			
		||||
            'episode_number': 1,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'noplaylist': True,
 | 
			
		||||
            'skip_download': True,  # m3u8 download
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Georestricted to Taiwan',
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
 | 
			
		||||
        episode_title = view_data['title']
 | 
			
		||||
        content_id = season_list['contentId']
 | 
			
		||||
 | 
			
		||||
        if prompt:
 | 
			
		||||
            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
 | 
			
		||||
 | 
			
		||||
        all_episodes = [
 | 
			
		||||
            self.url_result(smuggle_url(
 | 
			
		||||
                self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
 | 
			
		||||
                {'force_noplaylist': True}))  # To prevent infinite recursion
 | 
			
		||||
            for episode in season_list['episode']]
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(all_episodes, content_id, episode_title)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        url, data = unsmuggle_url(url, {})
 | 
			
		||||
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        noplaylist = self._downloader.params.get('noplaylist')
 | 
			
		||||
        noplaylist_prompt = True
 | 
			
		||||
        if 'force_noplaylist' in data:
 | 
			
		||||
            noplaylist = data['force_noplaylist']
 | 
			
		||||
            noplaylist_prompt = False
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
 | 
			
		||||
            r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
 | 
			
		||||
            webpage)))
 | 
			
		||||
 | 
			
		||||
        vod_data = self._parse_json(self._search_regex(
 | 
			
		||||
            'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
 | 
			
		||||
            video_id)
 | 
			
		||||
 | 
			
		||||
        season_list = list(vod_data.get('seasonList', {}).values())
 | 
			
		||||
        if season_list:
 | 
			
		||||
            if not noplaylist:
 | 
			
		||||
                return self._extract_playlist(
 | 
			
		||||
                    season_list[0], video_id, vod_data, view_data,
 | 
			
		||||
                    prompt=noplaylist_prompt)
 | 
			
		||||
 | 
			
		||||
            if noplaylist_prompt:
 | 
			
		||||
                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 | 
			
		||||
 | 
			
		||||
        # In browsers `getMainUrl` request is always issued. Usually this
 | 
			
		||||
        # endpoint gives the same result as the data embedded in the webpage.
 | 
			
		||||
        # If georestricted, there are no embedded data, so an extra request is
 | 
			
		||||
        # necessary to get the error code
 | 
			
		||||
        video_data = self._parse_json(self._search_regex(
 | 
			
		||||
            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
 | 
			
		||||
            webpage, 'video data', default='{}'), video_id)
 | 
			
		||||
        if not video_data:
 | 
			
		||||
            payload = {
 | 
			
		||||
                'assetId': view_data['assetId'],
 | 
			
		||||
                'watchDevices': vod_data['watchDevices'],
 | 
			
		||||
                'contentType': view_data['contentType'],
 | 
			
		||||
            }
 | 
			
		||||
            video_data = self._download_json(
 | 
			
		||||
                'https://www.litv.tv/vod/getMainUrl', video_id,
 | 
			
		||||
                data=json.dumps(payload).encode('utf-8'),
 | 
			
		||||
                headers={'Content-Type': 'application/json'})
 | 
			
		||||
 | 
			
		||||
        if not video_data.get('fullpath'):
 | 
			
		||||
            error_msg = video_data.get('errorMessage')
 | 
			
		||||
            if error_msg == 'vod.error.outsideregionerror':
 | 
			
		||||
                self.raise_geo_restricted('This video is available in Taiwan only')
 | 
			
		||||
            if error_msg:
 | 
			
		||||
                raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
 | 
			
		||||
            raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
 | 
			
		||||
 | 
			
		||||
        formats = self._extract_m3u8_formats(
 | 
			
		||||
            video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls')
 | 
			
		||||
        for a_format in formats:
 | 
			
		||||
            # LiTV HLS segments doesn't like compressions
 | 
			
		||||
            a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
 | 
			
		||||
 | 
			
		||||
        title = view_data['title'] + view_data.get('secondaryMark', '')
 | 
			
		||||
        description = view_data.get('description')
 | 
			
		||||
        thumbnail = view_data.get('imageFile')
 | 
			
		||||
        categories = [item['name'] for item in vod_data.get('category', [])]
 | 
			
		||||
        episode = int_or_none(view_data.get('episode'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'categories': categories,
 | 
			
		||||
            'episode_number': episode,
 | 
			
		||||
        }
 | 
			
		||||
@@ -17,7 +17,8 @@ class LiveLeakIE(InfoExtractor):
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'description': 'extremely bad day for this guy..!',
 | 
			
		||||
            'uploader': 'ljfriel2',
 | 
			
		||||
            'title': 'Most unlucky car accident'
 | 
			
		||||
            'title': 'Most unlucky car accident',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$'
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.liveleak.com/view?i=f93_1390833151',
 | 
			
		||||
@@ -28,6 +29,7 @@ class LiveLeakIE(InfoExtractor):
 | 
			
		||||
            'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
 | 
			
		||||
            'uploader': 'ARD_Stinkt',
 | 
			
		||||
            'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$'
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
 | 
			
		||||
@@ -49,7 +51,8 @@ class LiveLeakIE(InfoExtractor):
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
 | 
			
		||||
            'uploader': 'bony333',
 | 
			
		||||
            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
 | 
			
		||||
            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$'
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
@@ -72,6 +75,7 @@ class LiveLeakIE(InfoExtractor):
 | 
			
		||||
        age_limit = int_or_none(self._search_regex(
 | 
			
		||||
            r'you confirm that you are ([0-9]+) years and over.',
 | 
			
		||||
            webpage, 'age limit', default=None))
 | 
			
		||||
        video_thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
 | 
			
		||||
        sources_raw = self._search_regex(
 | 
			
		||||
            r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
 | 
			
		||||
@@ -124,4 +128,5 @@ class LiveLeakIE(InfoExtractor):
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
            'thumbnail': video_thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_stream_info(self, stream_info):
 | 
			
		||||
        broadcast_id = stream_info['broadcast_id']
 | 
			
		||||
        broadcast_id = compat_str(stream_info['broadcast_id'])
 | 
			
		||||
        is_live = stream_info.get('is_live')
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
@@ -203,9 +203,10 @@ class LivestreamIE(InfoExtractor):
 | 
			
		||||
            if not videos_info:
 | 
			
		||||
                break
 | 
			
		||||
            for v in videos_info:
 | 
			
		||||
                v_id = compat_str(v['id'])
 | 
			
		||||
                entries.append(self.url_result(
 | 
			
		||||
                    'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v['id']),
 | 
			
		||||
                    'Livestream', v['id'], v['caption']))
 | 
			
		||||
                    'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id),
 | 
			
		||||
                    'Livestream', v_id, v.get('caption')))
 | 
			
		||||
            last_video = videos_info[-1]['id']
 | 
			
		||||
        return self.playlist_result(entries, event_id, event_data['full_name'])
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										47
									
								
								youtube_dl/extractor/localnews8.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								youtube_dl/extractor/localnews8.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,47 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LocalNews8IE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
 | 
			
		||||
        'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '35183304',
 | 
			
		||||
            'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
 | 
			
		||||
            'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
 | 
			
		||||
            'duration': 153,
 | 
			
		||||
            'timestamp': 1441844822,
 | 
			
		||||
            'upload_date': '20150910',
 | 
			
		||||
            'uploader_id': 'api',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        partner_id = self._search_regex(
 | 
			
		||||
            r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
 | 
			
		||||
            webpage, 'partner id', group='id')
 | 
			
		||||
        kaltura_id = self._search_regex(
 | 
			
		||||
            r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
 | 
			
		||||
            webpage, 'videl id', group='id')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
 | 
			
		||||
            'ie_key': 'Kaltura',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,93 +1,94 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_HTTPError,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    clean_html,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LyndaBaseIE(InfoExtractor):
 | 
			
		||||
    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
 | 
			
		||||
    _SIGNIN_URL = 'https://www.lynda.com/signin'
 | 
			
		||||
    _PASSWORD_URL = 'https://www.lynda.com/signin/password'
 | 
			
		||||
    _USER_URL = 'https://www.lynda.com/signin/user'
 | 
			
		||||
    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
 | 
			
		||||
    _NETRC_MACHINE = 'lynda'
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
        self._login()
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _check_error(json_string, key_or_keys):
 | 
			
		||||
        keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
 | 
			
		||||
        for key in keys:
 | 
			
		||||
            error = json_string.get(key)
 | 
			
		||||
            if error:
 | 
			
		||||
                raise ExtractorError('Unable to login: %s' % error, expected=True)
 | 
			
		||||
 | 
			
		||||
    def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
 | 
			
		||||
        action_url = self._search_regex(
 | 
			
		||||
            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
 | 
			
		||||
            'post url', default=fallback_action_url, group='url')
 | 
			
		||||
 | 
			
		||||
        if not action_url.startswith('http'):
 | 
			
		||||
            action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)
 | 
			
		||||
 | 
			
		||||
        form_data = self._hidden_inputs(form_html)
 | 
			
		||||
        form_data.update(extra_form_data)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            response = self._download_json(
 | 
			
		||||
                action_url, None, note,
 | 
			
		||||
                data=urlencode_postdata(form_data),
 | 
			
		||||
                headers={
 | 
			
		||||
                    'Referer': referrer_url,
 | 
			
		||||
                    'X-Requested-With': 'XMLHttpRequest',
 | 
			
		||||
                })
 | 
			
		||||
        except ExtractorError as e:
 | 
			
		||||
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
 | 
			
		||||
                response = self._parse_json(e.cause.read().decode('utf-8'), None)
 | 
			
		||||
                self._check_error(response, ('email', 'password'))
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        self._check_error(response, 'ErrorMessage')
 | 
			
		||||
 | 
			
		||||
        return response, action_url
 | 
			
		||||
 | 
			
		||||
    def _login(self):
 | 
			
		||||
        username, password = self._get_login_info()
 | 
			
		||||
        if username is None:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        login_form = {
 | 
			
		||||
            'username': username,
 | 
			
		||||
            'password': password,
 | 
			
		||||
            'remember': 'false',
 | 
			
		||||
            'stayPut': 'false'
 | 
			
		||||
        }
 | 
			
		||||
        request = sanitized_Request(
 | 
			
		||||
            self._LOGIN_URL, urlencode_postdata(login_form))
 | 
			
		||||
        login_page = self._download_webpage(
 | 
			
		||||
            request, None, 'Logging in as %s' % username)
 | 
			
		||||
        # Step 1: download signin page
 | 
			
		||||
        signin_page = self._download_webpage(
 | 
			
		||||
            self._SIGNIN_URL, None, 'Downloading signin page')
 | 
			
		||||
 | 
			
		||||
        # Not (yet) logged in
 | 
			
		||||
        m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page)
 | 
			
		||||
        if m is not None:
 | 
			
		||||
            response = m.group('json')
 | 
			
		||||
            response_json = json.loads(response)
 | 
			
		||||
            state = response_json['state']
 | 
			
		||||
 | 
			
		||||
            if state == 'notlogged':
 | 
			
		||||
                raise ExtractorError(
 | 
			
		||||
                    'Unable to login, incorrect username and/or password',
 | 
			
		||||
                    expected=True)
 | 
			
		||||
 | 
			
		||||
            # This is when we get popup:
 | 
			
		||||
            # > You're already logged in to lynda.com on two devices.
 | 
			
		||||
            # > If you log in here, we'll log you out of another device.
 | 
			
		||||
            # So, we need to confirm this.
 | 
			
		||||
            if state == 'conflicted':
 | 
			
		||||
                confirm_form = {
 | 
			
		||||
                    'username': '',
 | 
			
		||||
                    'password': '',
 | 
			
		||||
                    'resolve': 'true',
 | 
			
		||||
                    'remember': 'false',
 | 
			
		||||
                    'stayPut': 'false',
 | 
			
		||||
                }
 | 
			
		||||
                request = sanitized_Request(
 | 
			
		||||
                    self._LOGIN_URL, urlencode_postdata(confirm_form))
 | 
			
		||||
                login_page = self._download_webpage(
 | 
			
		||||
                    request, None,
 | 
			
		||||
                    'Confirming log in and log out from another device')
 | 
			
		||||
 | 
			
		||||
        if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
 | 
			
		||||
            if 'login error' in login_page:
 | 
			
		||||
                mobj = re.search(
 | 
			
		||||
                    r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>',
 | 
			
		||||
                    login_page)
 | 
			
		||||
                if mobj:
 | 
			
		||||
                    raise ExtractorError(
 | 
			
		||||
                        'lynda returned error: %s - %s'
 | 
			
		||||
                        % (mobj.group('title'), clean_html(mobj.group('description'))),
 | 
			
		||||
                        expected=True)
 | 
			
		||||
            raise ExtractorError('Unable to log in')
 | 
			
		||||
 | 
			
		||||
    def _logout(self):
 | 
			
		||||
        username, _ = self._get_login_info()
 | 
			
		||||
        if username is None:
 | 
			
		||||
        # Already logged in
 | 
			
		||||
        if any(re.search(p, signin_page) for p in (
 | 
			
		||||
                'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self._download_webpage(
 | 
			
		||||
            'http://www.lynda.com/ajax/logout.aspx', None,
 | 
			
		||||
            'Logging out', 'Unable to log out', fatal=False)
 | 
			
		||||
        # Step 2: submit email
 | 
			
		||||
        signin_form = self._search_regex(
 | 
			
		||||
            r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
 | 
			
		||||
            signin_page, 'signin form')
 | 
			
		||||
        signin_page, signin_url = self._login_step(
 | 
			
		||||
            signin_form, self._PASSWORD_URL, {'email': username},
 | 
			
		||||
            'Submitting email', self._SIGNIN_URL)
 | 
			
		||||
 | 
			
		||||
        # Step 3: submit password
 | 
			
		||||
        password_form = signin_page['body']
 | 
			
		||||
        self._login_step(
 | 
			
		||||
            password_form, self._USER_URL, {'email': username, 'password': password},
 | 
			
		||||
            'Submitting password', signin_url)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LyndaIE(LyndaBaseIE):
 | 
			
		||||
@@ -212,8 +213,6 @@ class LyndaCourseIE(LyndaBaseIE):
 | 
			
		||||
            'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
 | 
			
		||||
            course_id, 'Downloading course JSON')
 | 
			
		||||
 | 
			
		||||
        self._logout()
 | 
			
		||||
 | 
			
		||||
        if course.get('Status') == 'NotFound':
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Course %s does not exist' % course_id, expected=True)
 | 
			
		||||
@@ -246,5 +245,6 @@ class LyndaCourseIE(LyndaBaseIE):
 | 
			
		||||
                % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
 | 
			
		||||
 | 
			
		||||
        course_title = course.get('Title')
 | 
			
		||||
        course_description = course.get('Description')
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, course_id, course_title)
 | 
			
		||||
        return self.playlist_result(entries, course_id, course_title, course_description)
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ class MGTVIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
 | 
			
		||||
        'md5': '',
 | 
			
		||||
        'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '3116640',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -20,15 +20,6 @@ class MGTVIE(InfoExtractor):
 | 
			
		||||
            'duration': 7461,
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,  # m3u8 download
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _FORMAT_MAP = {
 | 
			
		||||
        '标清': ('Standard', 0),
 | 
			
		||||
        '高清': ('High', 1),
 | 
			
		||||
        '超清': ('SuperHigh', 2),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -40,17 +31,27 @@ class MGTVIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for idx, stream in enumerate(api_data['stream']):
 | 
			
		||||
            format_name = stream.get('name')
 | 
			
		||||
            format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
 | 
			
		||||
            format_info = self._download_json(
 | 
			
		||||
                stream['url'], video_id,
 | 
			
		||||
                note='Download video info for format %s' % format_id or '#%d' % idx)
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'url': format_info['info'],
 | 
			
		||||
                'ext': 'mp4',  # These are m3u8 playlists
 | 
			
		||||
                'preference': preference,
 | 
			
		||||
            })
 | 
			
		||||
            stream_url = stream.get('url')
 | 
			
		||||
            if not stream_url:
 | 
			
		||||
                continue
 | 
			
		||||
            tbr = int_or_none(self._search_regex(
 | 
			
		||||
                r'(\d+)\.mp4', stream_url, 'tbr', default=None))
 | 
			
		||||
 | 
			
		||||
            def extract_format(stream_url, format_id, idx, query={}):
 | 
			
		||||
                format_info = self._download_json(
 | 
			
		||||
                    stream_url, video_id,
 | 
			
		||||
                    note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
 | 
			
		||||
                return {
 | 
			
		||||
                    'format_id': format_id,
 | 
			
		||||
                    'url': format_info['info'],
 | 
			
		||||
                    'ext': 'mp4',
 | 
			
		||||
                    'tbr': tbr,
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
            formats.append(extract_format(
 | 
			
		||||
                stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
 | 
			
		||||
            formats.append(extract_format(stream_url.replace(
 | 
			
		||||
                '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										192
									
								
								youtube_dl/extractor/microsoftvirtualacademy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								youtube_dl/extractor/microsoftvirtualacademy.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,192 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_xpath,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
 | 
			
		||||
    def _extract_base_url(self, course_id, display_id):
 | 
			
		||||
        return self._download_json(
 | 
			
		||||
            'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
 | 
			
		||||
            display_id, 'Downloading course base URL')
 | 
			
		||||
 | 
			
		||||
    def _extract_chapter_and_title(self, title):
 | 
			
		||||
        if not title:
 | 
			
		||||
            return None, None
 | 
			
		||||
        m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
 | 
			
		||||
        return (int(m.group('chapter')), m.group('title')) if m else (None, title)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
 | 
			
		||||
    IE_NAME = 'mva'
 | 
			
		||||
    IE_DESC = 'Microsoft Virtual Academy videos'
 | 
			
		||||
    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
 | 
			
		||||
        'md5': '7826c44fc31678b12ad8db11f6b5abb9',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'gfVXISmEB_6804984382',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Course Introduction',
 | 
			
		||||
            'formats': 'mincount:3',
 | 
			
		||||
            'subtitles': {
 | 
			
		||||
                'en': [{
 | 
			
		||||
                    'ext': 'ttml',
 | 
			
		||||
                }],
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'mva:11788:gfVXISmEB_6804984382',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        url, smuggled_data = unsmuggle_url(url, {})
 | 
			
		||||
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        course_id = mobj.group('course_id')
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
 | 
			
		||||
 | 
			
		||||
        settings = self._download_xml(
 | 
			
		||||
            '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
 | 
			
		||||
            video_id, 'Downloading video settings XML')
 | 
			
		||||
 | 
			
		||||
        _, title = self._extract_chapter_and_title(xpath_text(
 | 
			
		||||
            settings, './/Title', 'title', fatal=True))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        for sources in settings.findall(compat_xpath('.//MediaSources')):
 | 
			
		||||
            if sources.get('videoType') == 'smoothstreaming':
 | 
			
		||||
                continue
 | 
			
		||||
            for source in sources.findall(compat_xpath('./MediaSource')):
 | 
			
		||||
                video_url = source.text
 | 
			
		||||
                if not video_url or not video_url.startswith('http'):
 | 
			
		||||
                    continue
 | 
			
		||||
                video_mode = source.get('videoMode')
 | 
			
		||||
                height = int_or_none(self._search_regex(
 | 
			
		||||
                    r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
 | 
			
		||||
                codec = source.get('codec')
 | 
			
		||||
                acodec, vcodec = [None] * 2
 | 
			
		||||
                if codec:
 | 
			
		||||
                    codecs = codec.split(',')
 | 
			
		||||
                    if len(codecs) == 2:
 | 
			
		||||
                        acodec, vcodec = codecs
 | 
			
		||||
                    elif len(codecs) == 1:
 | 
			
		||||
                        vcodec = codecs[0]
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': video_url,
 | 
			
		||||
                    'format_id': video_mode,
 | 
			
		||||
                    'height': height,
 | 
			
		||||
                    'acodec': acodec,
 | 
			
		||||
                    'vcodec': vcodec,
 | 
			
		||||
                })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
 | 
			
		||||
            subtitle_url = source.text
 | 
			
		||||
            if not subtitle_url:
 | 
			
		||||
                continue
 | 
			
		||||
            subtitles.setdefault('en', []).append({
 | 
			
		||||
                'url': '%s/%s' % (base_url, subtitle_url),
 | 
			
		||||
                'ext': source.get('type'),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
            'formats': formats
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
 | 
			
		||||
    IE_NAME = 'mva:course'
 | 
			
		||||
    IE_DESC = 'Microsoft Virtual Academy courses'
 | 
			
		||||
    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '11788',
 | 
			
		||||
            'title': 'Microsoft Azure Fundamentals: Virtual Machines',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 36,
 | 
			
		||||
    }, {
 | 
			
		||||
        # with emphasized chapters
 | 
			
		||||
        'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '16335',
 | 
			
		||||
            'title': 'Developing Windows 10 Games with Construct 2',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 10,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'mva:course:11788',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
 | 
			
		||||
            MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        course_id = mobj.group('id')
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
 | 
			
		||||
        base_url = self._extract_base_url(course_id, display_id)
 | 
			
		||||
 | 
			
		||||
        manifest = self._download_json(
 | 
			
		||||
            '%s/imsmanifestlite.json' % base_url,
 | 
			
		||||
            display_id, 'Downloading course manifest JSON')['manifest']
 | 
			
		||||
 | 
			
		||||
        organization = manifest['organizations']['organization'][0]
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for chapter in organization['item']:
 | 
			
		||||
            chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
 | 
			
		||||
            chapter_id = chapter.get('@identifier')
 | 
			
		||||
            for item in chapter.get('item', []):
 | 
			
		||||
                item_id = item.get('@identifier')
 | 
			
		||||
                if not item_id:
 | 
			
		||||
                    continue
 | 
			
		||||
                metadata = item.get('resource', {}).get('metadata') or {}
 | 
			
		||||
                if metadata.get('learningresourcetype') != 'Video':
 | 
			
		||||
                    continue
 | 
			
		||||
                _, title = self._extract_chapter_and_title(item.get('title'))
 | 
			
		||||
                duration = parse_duration(metadata.get('duration'))
 | 
			
		||||
                description = metadata.get('description')
 | 
			
		||||
                entries.append({
 | 
			
		||||
                    '_type': 'url_transparent',
 | 
			
		||||
                    'url': smuggle_url(
 | 
			
		||||
                        'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
 | 
			
		||||
                    'title': title,
 | 
			
		||||
                    'description': description,
 | 
			
		||||
                    'duration': duration,
 | 
			
		||||
                    'chapter': chapter_title,
 | 
			
		||||
                    'chapter_number': chapter_number,
 | 
			
		||||
                    'chapter_id': chapter_id,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        title = organization.get('title') or manifest.get('metadata', {}).get('title')
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, course_id, title)
 | 
			
		||||
@@ -67,6 +67,23 @@ class NBCIE(InfoExtractor):
 | 
			
		||||
            # This video has expired but with an escaped embedURL
 | 
			
		||||
            'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # HLS streams requires the 'hdnea3' cookie
 | 
			
		||||
            'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'n1806',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Goliath',
 | 
			
		||||
                'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
 | 
			
		||||
                'timestamp': 1237100400,
 | 
			
		||||
                'upload_date': '20090315',
 | 
			
		||||
                'uploader': 'NBCU-COM',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Only works from US',
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
@@ -249,6 +266,11 @@ class NBCNewsIE(ThePlatformIE):
 | 
			
		||||
            'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
 | 
			
		||||
            'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -272,18 +294,17 @@ class NBCNewsIE(ThePlatformIE):
 | 
			
		||||
            webpage = self._download_webpage(url, display_id)
 | 
			
		||||
            info = None
 | 
			
		||||
            bootstrap_json = self._search_regex(
 | 
			
		||||
                r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
 | 
			
		||||
                [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
 | 
			
		||||
                 r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
 | 
			
		||||
                webpage, 'bootstrap json', default=None)
 | 
			
		||||
            if bootstrap_json:
 | 
			
		||||
                bootstrap = self._parse_json(bootstrap_json, display_id)
 | 
			
		||||
            bootstrap = self._parse_json(
 | 
			
		||||
                bootstrap_json, display_id, transform_source=unescapeHTML)
 | 
			
		||||
            if 'results' in bootstrap:
 | 
			
		||||
                info = bootstrap['results'][0]['video']
 | 
			
		||||
            elif 'video' in bootstrap:
 | 
			
		||||
                info = bootstrap['video']
 | 
			
		||||
            else:
 | 
			
		||||
                player_instance_json = self._search_regex(
 | 
			
		||||
                    r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None)
 | 
			
		||||
                if not player_instance_json:
 | 
			
		||||
                    player_instance_json = self._html_search_regex(
 | 
			
		||||
                        r'data-video="([^"]+)"', webpage, 'video json')
 | 
			
		||||
                info = self._parse_json(player_instance_json, display_id)
 | 
			
		||||
                info = bootstrap
 | 
			
		||||
            video_id = info['mpxId']
 | 
			
		||||
            title = info['title']
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1,18 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    month_by_name,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    remove_end,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NDTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
 | 
			
		||||
        'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
 | 
			
		||||
        'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '300710',
 | 
			
		||||
@@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor):
 | 
			
		||||
            'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
 | 
			
		||||
            'upload_date': '20131208',
 | 
			
		||||
            'duration': 1327,
 | 
			
		||||
            'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
 | 
			
		||||
            'thumbnail': 're:https?://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = remove_end(self._og_search_title(webpage), ' - NDTV')
 | 
			
		||||
 | 
			
		||||
        filename = self._search_regex(
 | 
			
		||||
            r"__filename='([^']+)'", webpage, 'video filename')
 | 
			
		||||
        video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
 | 
			
		||||
                     filename)
 | 
			
		||||
        video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
 | 
			
		||||
 | 
			
		||||
        duration = int_or_none(self._search_regex(
 | 
			
		||||
            r"__duration='([^']+)'", webpage, 'duration', fatal=False))
 | 
			
		||||
 | 
			
		||||
        date_m = re.search(r'''(?x)
 | 
			
		||||
            <p\s+class="vod_dateline">\s*
 | 
			
		||||
                Published\s+On:\s*
 | 
			
		||||
                (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
 | 
			
		||||
            ''', webpage)
 | 
			
		||||
        upload_date = None
 | 
			
		||||
        upload_date = unified_strdate(self._html_search_meta(
 | 
			
		||||
            'publish-date', webpage, 'upload date', fatal=False))
 | 
			
		||||
 | 
			
		||||
        if date_m is not None:
 | 
			
		||||
            month = month_by_name(date_m.group('monthname'))
 | 
			
		||||
            if month is not None:
 | 
			
		||||
                upload_date = '%s%02d%02d' % (
 | 
			
		||||
                    date_m.group('year'), month, int(date_m.group('day')))
 | 
			
		||||
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        READ_MORE = ' (Read more)'
 | 
			
		||||
        if description.endswith(READ_MORE):
 | 
			
		||||
            description = description[:-len(READ_MORE)]
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
        TITLE_SUFFIX = ' - NDTV'
 | 
			
		||||
        if title.endswith(TITLE_SUFFIX):
 | 
			
		||||
            title = title[:-len(TITLE_SUFFIX)]
 | 
			
		||||
        description = remove_end(self._og_search_description(webpage), ' (Read more)')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
 
 | 
			
		||||
@@ -2,8 +2,12 @@ from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    clean_html,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    qualities,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -16,12 +20,12 @@ class NFBIE(InfoExtractor):
 | 
			
		||||
        'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'qallunaat_why_white_people_are_funny',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Qallunaat! Why White People Are Funny ',
 | 
			
		||||
            'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
 | 
			
		||||
            'description': 'md5:6b8e32dde3abf91e58857b174916620c',
 | 
			
		||||
            'duration': 3128,
 | 
			
		||||
            'creator': 'Mark Sandiford',
 | 
			
		||||
            'uploader': 'Mark Sandiford',
 | 
			
		||||
            'uploader_id': 'mark-sandiford',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
@@ -31,65 +35,78 @@ class NFBIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        page = self._download_webpage(
 | 
			
		||||
            'https://www.nfb.ca/film/%s' % video_id, video_id,
 | 
			
		||||
            'Downloading film page')
 | 
			
		||||
 | 
			
		||||
        uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
 | 
			
		||||
                                              page, 'director id', fatal=False)
 | 
			
		||||
        uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
 | 
			
		||||
                                           page, 'director name', fatal=False)
 | 
			
		||||
 | 
			
		||||
        request = sanitized_Request(
 | 
			
		||||
        config = self._download_xml(
 | 
			
		||||
            'https://www.nfb.ca/film/%s/player_config' % video_id,
 | 
			
		||||
            urlencode_postdata({'getConfig': 'true'}))
 | 
			
		||||
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 | 
			
		||||
        request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
 | 
			
		||||
            video_id, 'Downloading player config XML',
 | 
			
		||||
            data=urlencode_postdata({'getConfig': 'true'}),
 | 
			
		||||
            headers={
 | 
			
		||||
                'Content-Type': 'application/x-www-form-urlencoded',
 | 
			
		||||
                'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        config = self._download_xml(request, video_id, 'Downloading player config XML')
 | 
			
		||||
 | 
			
		||||
        title = None
 | 
			
		||||
        description = None
 | 
			
		||||
        thumbnail = None
 | 
			
		||||
        duration = None
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        def extract_thumbnail(media):
 | 
			
		||||
            thumbnails = {}
 | 
			
		||||
            for asset in media.findall('assets/asset'):
 | 
			
		||||
                thumbnails[asset.get('quality')] = asset.find('default/url').text
 | 
			
		||||
            if not thumbnails:
 | 
			
		||||
                return None
 | 
			
		||||
            if 'high' in thumbnails:
 | 
			
		||||
                return thumbnails['high']
 | 
			
		||||
            return list(thumbnails.values())[0]
 | 
			
		||||
        title, description, thumbnail, duration, uploader, author = [None] * 6
 | 
			
		||||
        thumbnails, formats = [[]] * 2
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
 | 
			
		||||
        for media in config.findall('./player/stream/media'):
 | 
			
		||||
            if media.get('type') == 'posterImage':
 | 
			
		||||
                thumbnail = extract_thumbnail(media)
 | 
			
		||||
            elif media.get('type') == 'video':
 | 
			
		||||
                duration = int(media.get('duration'))
 | 
			
		||||
                title = media.find('title').text
 | 
			
		||||
                description = media.find('description').text
 | 
			
		||||
                # It seems assets always go from lower to better quality, so no need to sort
 | 
			
		||||
                quality_key = qualities(('low', 'high'))
 | 
			
		||||
                thumbnails = []
 | 
			
		||||
                for asset in media.findall('assets/asset'):
 | 
			
		||||
                    for x in asset:
 | 
			
		||||
                    asset_url = xpath_text(asset, 'default/url', default=None)
 | 
			
		||||
                    if not asset_url:
 | 
			
		||||
                        continue
 | 
			
		||||
                    quality = asset.get('quality')
 | 
			
		||||
                    thumbnails.append({
 | 
			
		||||
                        'url': asset_url,
 | 
			
		||||
                        'id': quality,
 | 
			
		||||
                        'preference': quality_key(quality),
 | 
			
		||||
                    })
 | 
			
		||||
            elif media.get('type') == 'video':
 | 
			
		||||
                title = xpath_text(media, 'title', fatal=True)
 | 
			
		||||
                for asset in media.findall('assets/asset'):
 | 
			
		||||
                    quality = asset.get('quality')
 | 
			
		||||
                    height = int_or_none(self._search_regex(
 | 
			
		||||
                        r'^(\d+)[pP]$', quality or '', 'height', default=None))
 | 
			
		||||
                    for node in asset:
 | 
			
		||||
                        streamer = xpath_text(node, 'streamerURI', default=None)
 | 
			
		||||
                        if not streamer:
 | 
			
		||||
                            continue
 | 
			
		||||
                        play_path = xpath_text(node, 'url', default=None)
 | 
			
		||||
                        if not play_path:
 | 
			
		||||
                            continue
 | 
			
		||||
                        formats.append({
 | 
			
		||||
                            'url': x.find('streamerURI').text,
 | 
			
		||||
                            'app': x.find('streamerURI').text.split('/', 3)[3],
 | 
			
		||||
                            'play_path': x.find('url').text,
 | 
			
		||||
                            'url': streamer,
 | 
			
		||||
                            'app': streamer.split('/', 3)[3],
 | 
			
		||||
                            'play_path': play_path,
 | 
			
		||||
                            'rtmp_live': False,
 | 
			
		||||
                            'ext': 'mp4',
 | 
			
		||||
                            'format_id': '%s-%s' % (x.tag, asset.get('quality')),
 | 
			
		||||
                            'ext': 'flv',
 | 
			
		||||
                            'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
 | 
			
		||||
                            'height': height,
 | 
			
		||||
                        })
 | 
			
		||||
                self._sort_formats(formats)
 | 
			
		||||
                description = clean_html(xpath_text(media, 'description'))
 | 
			
		||||
                uploader = xpath_text(media, 'author')
 | 
			
		||||
                duration = int_or_none(media.get('duration'))
 | 
			
		||||
                for subtitle in media.findall('./subtitles/subtitle'):
 | 
			
		||||
                    subtitle_url = xpath_text(subtitle, 'url', default=None)
 | 
			
		||||
                    if not subtitle_url:
 | 
			
		||||
                        continue
 | 
			
		||||
                    lang = xpath_text(subtitle, 'lang', default='en')
 | 
			
		||||
                    subtitles.setdefault(lang, []).append({
 | 
			
		||||
                        'url': subtitle_url,
 | 
			
		||||
                        'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'creator': uploader,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'uploader_id': uploader_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -4,91 +4,219 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_urllib_parse_unquote,
 | 
			
		||||
)
 | 
			
		||||
from ..compat import compat_urllib_parse_unquote
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_age_limit,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NRKIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.nrk.no/video/PS*150533',
 | 
			
		||||
            # MD5 is unstable
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '150533',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Dompap og andre fugler i Piip-Show',
 | 
			
		||||
                'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
 | 
			
		||||
                'duration': 263,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.nrk.no/video/PS*154915',
 | 
			
		||||
            # MD5 is unstable
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '154915',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Slik høres internett ut når du er blind',
 | 
			
		||||
                'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
 | 
			
		||||
                'duration': 20,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
class NRKBaseIE(InfoExtractor):
 | 
			
		||||
    def _extract_formats(self, manifest_url, video_id, fatal=True):
 | 
			
		||||
        formats = []
 | 
			
		||||
        formats.extend(self._extract_f4m_formats(
 | 
			
		||||
            manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81',
 | 
			
		||||
            video_id, f4m_id='hds', fatal=fatal))
 | 
			
		||||
        formats.extend(self._extract_m3u8_formats(manifest_url.replace(
 | 
			
		||||
            'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'),
 | 
			
		||||
            video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal))
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        data = self._download_json(
 | 
			
		||||
            'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
 | 
			
		||||
            video_id, 'Downloading media JSON')
 | 
			
		||||
            'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
 | 
			
		||||
            video_id, 'Downloading mediaelement JSON')
 | 
			
		||||
 | 
			
		||||
        media_url = data.get('mediaUrl')
 | 
			
		||||
        title = data.get('fullTitle') or data.get('mainTitle') or data['title']
 | 
			
		||||
        video_id = data.get('id') or video_id
 | 
			
		||||
 | 
			
		||||
        if not media_url:
 | 
			
		||||
            if data['usageRights']['isGeoBlocked']:
 | 
			
		||||
        entries = []
 | 
			
		||||
 | 
			
		||||
        media_assets = data.get('mediaAssets')
 | 
			
		||||
        if media_assets and isinstance(media_assets, list):
 | 
			
		||||
            def video_id_and_title(idx):
 | 
			
		||||
                return ((video_id, title) if len(media_assets) == 1
 | 
			
		||||
                        else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
 | 
			
		||||
            for num, asset in enumerate(media_assets, 1):
 | 
			
		||||
                asset_url = asset.get('url')
 | 
			
		||||
                if not asset_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                formats = self._extract_formats(asset_url, video_id, fatal=False)
 | 
			
		||||
                if not formats:
 | 
			
		||||
                    continue
 | 
			
		||||
                self._sort_formats(formats)
 | 
			
		||||
                entry_id, entry_title = video_id_and_title(num)
 | 
			
		||||
                duration = parse_duration(asset.get('duration'))
 | 
			
		||||
                subtitles = {}
 | 
			
		||||
                for subtitle in ('webVtt', 'timedText'):
 | 
			
		||||
                    subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
 | 
			
		||||
                    if subtitle_url:
 | 
			
		||||
                        subtitles.setdefault('no', []).append({
 | 
			
		||||
                            'url': compat_urllib_parse_unquote(subtitle_url)
 | 
			
		||||
                        })
 | 
			
		||||
                entries.append({
 | 
			
		||||
                    'id': asset.get('carrierId') or entry_id,
 | 
			
		||||
                    'title': entry_title,
 | 
			
		||||
                    'duration': duration,
 | 
			
		||||
                    'subtitles': subtitles,
 | 
			
		||||
                    'formats': formats,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        if not entries:
 | 
			
		||||
            media_url = data.get('mediaUrl')
 | 
			
		||||
            if media_url:
 | 
			
		||||
                formats = self._extract_formats(media_url, video_id)
 | 
			
		||||
                self._sort_formats(formats)
 | 
			
		||||
                duration = parse_duration(data.get('duration'))
 | 
			
		||||
                entries = [{
 | 
			
		||||
                    'id': video_id,
 | 
			
		||||
                    'title': title,
 | 
			
		||||
                    'duration': duration,
 | 
			
		||||
                    'formats': formats,
 | 
			
		||||
                }]
 | 
			
		||||
 | 
			
		||||
        if not entries:
 | 
			
		||||
            if data.get('usageRights', {}).get('isGeoBlocked'):
 | 
			
		||||
                raise ExtractorError(
 | 
			
		||||
                    'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
 | 
			
		||||
                    expected=True)
 | 
			
		||||
 | 
			
		||||
        if determine_ext(media_url) == 'f4m':
 | 
			
		||||
            formats = self._extract_f4m_formats(
 | 
			
		||||
                media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
        else:
 | 
			
		||||
            formats = [{
 | 
			
		||||
                'url': media_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        duration = parse_duration(data.get('duration'))
 | 
			
		||||
        conviva = data.get('convivaStatistics') or {}
 | 
			
		||||
        series = conviva.get('seriesName') or data.get('seriesTitle')
 | 
			
		||||
        episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
 | 
			
		||||
 | 
			
		||||
        thumbnails = None
 | 
			
		||||
        images = data.get('images')
 | 
			
		||||
        if images:
 | 
			
		||||
            thumbnails = images['webImages']
 | 
			
		||||
            thumbnails.sort(key=lambda image: image['pixelWidth'])
 | 
			
		||||
            thumbnail = thumbnails[-1]['imageUrl']
 | 
			
		||||
        else:
 | 
			
		||||
            thumbnail = None
 | 
			
		||||
        if images and isinstance(images, dict):
 | 
			
		||||
            web_images = images.get('webImages')
 | 
			
		||||
            if isinstance(web_images, list):
 | 
			
		||||
                thumbnails = [{
 | 
			
		||||
                    'url': image['imageUrl'],
 | 
			
		||||
                    'width': int_or_none(image.get('width')),
 | 
			
		||||
                    'height': int_or_none(image.get('height')),
 | 
			
		||||
                } for image in web_images if image.get('imageUrl')]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': data['title'],
 | 
			
		||||
            'description': data['description'],
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        description = data.get('description')
 | 
			
		||||
 | 
			
		||||
        common_info = {
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'series': series,
 | 
			
		||||
            'episode': episode,
 | 
			
		||||
            'age_limit': parse_age_limit(data.get('legalAge')),
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        vcodec = 'none' if data.get('mediaType') == 'Audio' else None
 | 
			
		||||
 | 
			
		||||
        # TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged
 | 
			
		||||
 | 
			
		||||
        for entry in entries:
 | 
			
		||||
            entry.update(common_info)
 | 
			
		||||
            for f in entry['formats']:
 | 
			
		||||
                f['vcodec'] = vcodec
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, video_id, title, description)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NRKIE(NRKBaseIE):
 | 
			
		||||
    _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
 | 
			
		||||
    _API_HOST = 'v8.psapi.nrk.no'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # video
 | 
			
		||||
        'url': 'http://www.nrk.no/video/PS*150533',
 | 
			
		||||
        'md5': '2f7f6eeb2aacdd99885f355428715cfa',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '150533',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Dompap og andre fugler i Piip-Show',
 | 
			
		||||
            'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
 | 
			
		||||
            'duration': 263,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # audio
 | 
			
		||||
        'url': 'http://www.nrk.no/video/PS*154915',
 | 
			
		||||
        # MD5 is unstable
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '154915',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Slik høres internett ut når du er blind',
 | 
			
		||||
            'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
 | 
			
		||||
            'duration': 20,
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NRKTVIE(NRKBaseIE):
 | 
			
		||||
    IE_DESC = 'NRK TV and NRK Radio'
 | 
			
		||||
    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 | 
			
		||||
    _API_HOST = 'psapi-we.nrk.no'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
 | 
			
		||||
        'md5': '4e9ca6629f09e588ed240fb11619922a',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'MUHH48000314AA',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': '20 spørsmål 23.05.2014',
 | 
			
		||||
            'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
 | 
			
		||||
            'duration': 1741.52,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://tv.nrk.no/program/mdfp15000514',
 | 
			
		||||
        'md5': '43d0be26663d380603a9cf0c24366531',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'MDFP15000514CA',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
 | 
			
		||||
            'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
 | 
			
		||||
            'duration': 4605.08,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # single playlist video
 | 
			
		||||
        'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
 | 
			
		||||
        'md5': 'adbd1dbd813edaf532b0a253780719c2',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'MSPO40010515-part2',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
 | 
			
		||||
            'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Only works from Norway',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'md5': '9480285eff92d64f06e02a5367970a7a',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'MSPO40010515-part1',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
 | 
			
		||||
                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'md5': 'adbd1dbd813edaf532b0a253780719c2',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'MSPO40010515-part2',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
 | 
			
		||||
                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'MSPO40010515',
 | 
			
		||||
            'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
 | 
			
		||||
            'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
            'duration': 6947.52,
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Only works from Norway',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NRKPlaylistIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
 | 
			
		||||
@@ -159,179 +287,3 @@ class NRKSkoleIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id')
 | 
			
		||||
        return self.url_result('nrk:%s' % nrk_id)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NRKTVIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'NRK TV and NRK Radio'
 | 
			
		||||
    _VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'MUHH48000314',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': '20 spørsmål',
 | 
			
		||||
                'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
 | 
			
		||||
                'upload_date': '20140523',
 | 
			
		||||
                'duration': 1741.52,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://tv.nrk.no/program/mdfp15000514',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'mdfp15000514',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
 | 
			
		||||
                'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
 | 
			
		||||
                'upload_date': '20140524',
 | 
			
		||||
                'duration': 4605.08,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # m3u8 download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # single playlist video
 | 
			
		||||
            'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
 | 
			
		||||
            'md5': 'adbd1dbd813edaf532b0a253780719c2',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'MSPO40010515-part2',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
 | 
			
		||||
                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
                'upload_date': '20150106',
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Only works from Norway',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
 | 
			
		||||
            'playlist': [
 | 
			
		||||
                {
 | 
			
		||||
                    'md5': '9480285eff92d64f06e02a5367970a7a',
 | 
			
		||||
                    'info_dict': {
 | 
			
		||||
                        'id': 'MSPO40010515-part1',
 | 
			
		||||
                        'ext': 'flv',
 | 
			
		||||
                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
 | 
			
		||||
                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
                        'upload_date': '20150106',
 | 
			
		||||
                    },
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    'md5': 'adbd1dbd813edaf532b0a253780719c2',
 | 
			
		||||
                    'info_dict': {
 | 
			
		||||
                        'id': 'MSPO40010515-part2',
 | 
			
		||||
                        'ext': 'flv',
 | 
			
		||||
                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
 | 
			
		||||
                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
                        'upload_date': '20150106',
 | 
			
		||||
                    },
 | 
			
		||||
                },
 | 
			
		||||
            ],
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'MSPO40010515',
 | 
			
		||||
                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
 | 
			
		||||
                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
 | 
			
		||||
                'upload_date': '20150106',
 | 
			
		||||
                'duration': 6947.5199999999995,
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Only works from Norway',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _extract_f4m(self, manifest_url, video_id):
 | 
			
		||||
        return self._extract_f4m_formats(
 | 
			
		||||
            manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds')
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        part_id = mobj.group('part_id')
 | 
			
		||||
        base_url = mobj.group('baseurl')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_meta(
 | 
			
		||||
            'title', webpage, 'title')
 | 
			
		||||
        description = self._html_search_meta(
 | 
			
		||||
            'description', webpage, 'description')
 | 
			
		||||
 | 
			
		||||
        thumbnail = self._html_search_regex(
 | 
			
		||||
            r'data-posterimage="([^"]+)"',
 | 
			
		||||
            webpage, 'thumbnail', fatal=False)
 | 
			
		||||
        upload_date = unified_strdate(self._html_search_meta(
 | 
			
		||||
            'rightsfrom', webpage, 'upload date', fatal=False))
 | 
			
		||||
        duration = float_or_none(self._html_search_regex(
 | 
			
		||||
            r'data-duration="([^"]+)"',
 | 
			
		||||
            webpage, 'duration', fatal=False))
 | 
			
		||||
 | 
			
		||||
        # playlist
 | 
			
		||||
        parts = re.findall(
 | 
			
		||||
            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
 | 
			
		||||
        if parts:
 | 
			
		||||
            entries = []
 | 
			
		||||
            for current_part_id, stream_url, part_title in parts:
 | 
			
		||||
                if part_id and current_part_id != part_id:
 | 
			
		||||
                    continue
 | 
			
		||||
                video_part_id = '%s-part%s' % (video_id, current_part_id)
 | 
			
		||||
                formats = self._extract_f4m(stream_url, video_part_id)
 | 
			
		||||
                entries.append({
 | 
			
		||||
                    'id': video_part_id,
 | 
			
		||||
                    'title': part_title,
 | 
			
		||||
                    'description': description,
 | 
			
		||||
                    'thumbnail': thumbnail,
 | 
			
		||||
                    'upload_date': upload_date,
 | 
			
		||||
                    'formats': formats,
 | 
			
		||||
                })
 | 
			
		||||
            if part_id:
 | 
			
		||||
                if entries:
 | 
			
		||||
                    return entries[0]
 | 
			
		||||
            else:
 | 
			
		||||
                playlist = self.playlist_result(entries, video_id, title, description)
 | 
			
		||||
                playlist.update({
 | 
			
		||||
                    'thumbnail': thumbnail,
 | 
			
		||||
                    'upload_date': upload_date,
 | 
			
		||||
                    'duration': duration,
 | 
			
		||||
                })
 | 
			
		||||
                return playlist
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        f4m_url = re.search(r'data-media="([^"]+)"', webpage)
 | 
			
		||||
        if f4m_url:
 | 
			
		||||
            formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
 | 
			
		||||
 | 
			
		||||
        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
 | 
			
		||||
        if m3u8_url:
 | 
			
		||||
            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls'))
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        subtitles_url = self._html_search_regex(
 | 
			
		||||
            r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1',
 | 
			
		||||
            webpage, 'subtitle URL', default=None, group='url')
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        if subtitles_url:
 | 
			
		||||
            subtitles['no'] = [{
 | 
			
		||||
                'ext': 'ttml',
 | 
			
		||||
                'url': compat_urlparse.urljoin(base_url, subtitles_url),
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,7 @@ from ..utils import (
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
from ..compat import compat_urllib_parse_urlencode
 | 
			
		||||
 | 
			
		||||
@@ -15,71 +16,80 @@ from ..compat import compat_urllib_parse_urlencode
 | 
			
		||||
class OoyalaBaseIE(InfoExtractor):
 | 
			
		||||
    _PLAYER_BASE = 'http://player.ooyala.com/'
 | 
			
		||||
    _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
 | 
			
		||||
    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
 | 
			
		||||
    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
 | 
			
		||||
 | 
			
		||||
    def _extract(self, content_tree_url, video_id, domain='example.org'):
 | 
			
		||||
        content_tree = self._download_json(content_tree_url, video_id)['content_tree']
 | 
			
		||||
        metadata = content_tree[list(content_tree)[0]]
 | 
			
		||||
        embed_code = metadata['embed_code']
 | 
			
		||||
        pcode = metadata.get('asset_pcode') or embed_code
 | 
			
		||||
        video_info = {
 | 
			
		||||
            'id': embed_code,
 | 
			
		||||
            'title': metadata['title'],
 | 
			
		||||
            'description': metadata.get('description'),
 | 
			
		||||
            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
 | 
			
		||||
            'duration': float_or_none(metadata.get('duration'), 1000),
 | 
			
		||||
        }
 | 
			
		||||
        title = metadata['title']
 | 
			
		||||
 | 
			
		||||
        auth_data = self._download_json(
 | 
			
		||||
            self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
 | 
			
		||||
            compat_urllib_parse_urlencode({
 | 
			
		||||
                'domain': domain,
 | 
			
		||||
                'supportedFormats': 'mp4,rtmp,m3u8,hds',
 | 
			
		||||
            }), video_id)
 | 
			
		||||
 | 
			
		||||
        cur_auth_data = auth_data['authorization_data'][embed_code]
 | 
			
		||||
 | 
			
		||||
        urls = []
 | 
			
		||||
        formats = []
 | 
			
		||||
        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
 | 
			
		||||
            auth_data = self._download_json(
 | 
			
		||||
                self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
 | 
			
		||||
                compat_urllib_parse_urlencode({
 | 
			
		||||
                    'domain': domain,
 | 
			
		||||
                    'supportedFormats': supported_format
 | 
			
		||||
                }),
 | 
			
		||||
                video_id, 'Downloading %s JSON' % supported_format)
 | 
			
		||||
 | 
			
		||||
            cur_auth_data = auth_data['authorization_data'][embed_code]
 | 
			
		||||
 | 
			
		||||
            if cur_auth_data['authorized']:
 | 
			
		||||
                for stream in cur_auth_data['streams']:
 | 
			
		||||
                    url = base64.b64decode(
 | 
			
		||||
                        stream['url']['data'].encode('ascii')).decode('utf-8')
 | 
			
		||||
                    if url in urls:
 | 
			
		||||
                        continue
 | 
			
		||||
                    urls.append(url)
 | 
			
		||||
                    delivery_type = stream['delivery_type']
 | 
			
		||||
                    if delivery_type == 'hls' or '.m3u8' in url:
 | 
			
		||||
                        formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                            url, embed_code, 'mp4', 'm3u8_native',
 | 
			
		||||
                            m3u8_id='hls', fatal=False))
 | 
			
		||||
                    elif delivery_type == 'hds' or '.f4m' in url:
 | 
			
		||||
                        formats.extend(self._extract_f4m_formats(
 | 
			
		||||
                            url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
 | 
			
		||||
                    elif '.smil' in url:
 | 
			
		||||
                        formats.extend(self._extract_smil_formats(
 | 
			
		||||
                            url, embed_code, fatal=False))
 | 
			
		||||
                    else:
 | 
			
		||||
                        formats.append({
 | 
			
		||||
                            'url': url,
 | 
			
		||||
                            'ext': stream.get('delivery_type'),
 | 
			
		||||
                            'vcodec': stream.get('video_codec'),
 | 
			
		||||
                            'format_id': delivery_type,
 | 
			
		||||
                            'width': int_or_none(stream.get('width')),
 | 
			
		||||
                            'height': int_or_none(stream.get('height')),
 | 
			
		||||
                            'abr': int_or_none(stream.get('audio_bitrate')),
 | 
			
		||||
                            'vbr': int_or_none(stream.get('video_bitrate')),
 | 
			
		||||
                            'fps': float_or_none(stream.get('framerate')),
 | 
			
		||||
                        })
 | 
			
		||||
            else:
 | 
			
		||||
                raise ExtractorError('%s said: %s' % (
 | 
			
		||||
                    self.IE_NAME, cur_auth_data['message']), expected=True)
 | 
			
		||||
        if cur_auth_data['authorized']:
 | 
			
		||||
            for stream in cur_auth_data['streams']:
 | 
			
		||||
                s_url = base64.b64decode(
 | 
			
		||||
                    stream['url']['data'].encode('ascii')).decode('utf-8')
 | 
			
		||||
                if s_url in urls:
 | 
			
		||||
                    continue
 | 
			
		||||
                urls.append(s_url)
 | 
			
		||||
                ext = determine_ext(s_url, None)
 | 
			
		||||
                delivery_type = stream['delivery_type']
 | 
			
		||||
                if delivery_type == 'hls' or ext == 'm3u8':
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        s_url, embed_code, 'mp4', 'm3u8_native',
 | 
			
		||||
                        m3u8_id='hls', fatal=False))
 | 
			
		||||
                elif delivery_type == 'hds' or ext == 'f4m':
 | 
			
		||||
                    formats.extend(self._extract_f4m_formats(
 | 
			
		||||
                        s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
 | 
			
		||||
                elif ext == 'smil':
 | 
			
		||||
                    formats.extend(self._extract_smil_formats(
 | 
			
		||||
                        s_url, embed_code, fatal=False))
 | 
			
		||||
                else:
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'url': s_url,
 | 
			
		||||
                        'ext': ext or stream.get('delivery_type'),
 | 
			
		||||
                        'vcodec': stream.get('video_codec'),
 | 
			
		||||
                        'format_id': delivery_type,
 | 
			
		||||
                        'width': int_or_none(stream.get('width')),
 | 
			
		||||
                        'height': int_or_none(stream.get('height')),
 | 
			
		||||
                        'abr': int_or_none(stream.get('audio_bitrate')),
 | 
			
		||||
                        'vbr': int_or_none(stream.get('video_bitrate')),
 | 
			
		||||
                        'fps': float_or_none(stream.get('framerate')),
 | 
			
		||||
                    })
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (
 | 
			
		||||
                self.IE_NAME, cur_auth_data['message']), expected=True)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        video_info['formats'] = formats
 | 
			
		||||
        return video_info
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
 | 
			
		||||
            sub_url = sub.get('url')
 | 
			
		||||
            if not sub_url:
 | 
			
		||||
                continue
 | 
			
		||||
            subtitles[lang] = [{
 | 
			
		||||
                'url': sub_url,
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': embed_code,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': metadata.get('description'),
 | 
			
		||||
            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
 | 
			
		||||
            'duration': float_or_none(metadata.get('duration'), 1000),
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class OoyalaIE(OoyalaBaseIE):
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class OpenloadIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)'
 | 
			
		||||
    _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://openload.co/f/kUEfGclsU9o',
 | 
			
		||||
@@ -31,6 +31,9 @@ class OpenloadIE(InfoExtractor):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://openload.co/f/_-ztPaZtMhM/',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
 | 
			
		||||
        # for title and ext
 | 
			
		||||
@@ -100,7 +103,7 @@ class OpenloadIE(InfoExtractor):
 | 
			
		||||
            raise ExtractorError('File not found', expected=True)
 | 
			
		||||
 | 
			
		||||
        code = self._search_regex(
 | 
			
		||||
            r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>',
 | 
			
		||||
            r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>',
 | 
			
		||||
            webpage, 'JS code')
 | 
			
		||||
 | 
			
		||||
        decoded = self.openload_decode(code)
 | 
			
		||||
 
 | 
			
		||||
@@ -12,8 +12,8 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class OraTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
 | 
			
		||||
        'md5': 'fa33717591c631ec93b04b0e330df786',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -22,7 +22,10 @@ class OraTVIE(InfoExtractor):
 | 
			
		||||
            'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
 | 
			
		||||
            'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
 
 | 
			
		||||
@@ -2,11 +2,15 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import parse_iso8601
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PeriscopeIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'Periscope'
 | 
			
		||||
    IE_NAME = 'periscope'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
 | 
			
		||||
    # Alive example URLs can be found here http://onperiscope.com/
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
@@ -41,8 +45,11 @@ class PeriscopeIE(InfoExtractor):
 | 
			
		||||
        broadcast = broadcast_data['broadcast']
 | 
			
		||||
        status = broadcast['status']
 | 
			
		||||
 | 
			
		||||
        uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
 | 
			
		||||
        uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
 | 
			
		||||
        user = broadcast_data.get('user', {})
 | 
			
		||||
 | 
			
		||||
        uploader = broadcast.get('user_display_name') or user.get('display_name')
 | 
			
		||||
        uploader_id = (broadcast.get('username') or user.get('username') or
 | 
			
		||||
                       broadcast.get('user_id') or user.get('id'))
 | 
			
		||||
 | 
			
		||||
        title = '%s - %s' % (uploader, status) if uploader else status
 | 
			
		||||
        state = broadcast.get('state').lower()
 | 
			
		||||
@@ -79,3 +86,43 @@ class PeriscopeIE(InfoExtractor):
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PeriscopeUserIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$'
 | 
			
		||||
    IE_DESC = 'Periscope user videos'
 | 
			
		||||
    IE_NAME = 'periscope:user'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.periscope.tv/LularoeHusbandMike/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'LularoeHusbandMike',
 | 
			
		||||
            'title': 'LULAROE HUSBAND MIKE',
 | 
			
		||||
            'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
 | 
			
		||||
        },
 | 
			
		||||
        # Periscope only shows videos in the last 24 hours, so it's possible to
 | 
			
		||||
        # get 0 videos
 | 
			
		||||
        'playlist_mincount': 0,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        user_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, user_id)
 | 
			
		||||
 | 
			
		||||
        data_store = self._parse_json(
 | 
			
		||||
            unescapeHTML(self._search_regex(
 | 
			
		||||
                r'data-store=(["\'])(?P<data>.+?)\1',
 | 
			
		||||
                webpage, 'data store', default='{}', group='data')),
 | 
			
		||||
            user_id)
 | 
			
		||||
 | 
			
		||||
        user = data_store.get('User', {}).get('user', {})
 | 
			
		||||
        title = user.get('display_name') or user.get('username')
 | 
			
		||||
        description = user.get('description')
 | 
			
		||||
 | 
			
		||||
        entries = [
 | 
			
		||||
            self.url_result(
 | 
			
		||||
                'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
 | 
			
		||||
            for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, user_id, title, description)
 | 
			
		||||
 
 | 
			
		||||
@@ -4,9 +4,8 @@ import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    dict_get,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -23,6 +22,19 @@ class PlaywireIE(InfoExtractor):
 | 
			
		||||
            'duration': 145.94,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # m3u8 in f4m
 | 
			
		||||
        'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '4840492',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'ITV EL SHOW FULL',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # Multiple resolutions while bitrates missing
 | 
			
		||||
        'url': 'http://cdn.playwire.com/11625/embed/85228.html',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
@@ -48,25 +60,10 @@ class PlaywireIE(InfoExtractor):
 | 
			
		||||
        thumbnail = content.get('poster')
 | 
			
		||||
        src = content['media']['f4m']
 | 
			
		||||
 | 
			
		||||
        f4m = self._download_xml(src, video_id)
 | 
			
		||||
        base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
 | 
			
		||||
        formats = []
 | 
			
		||||
        for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
 | 
			
		||||
            media_url = media.get('url')
 | 
			
		||||
            if not media_url:
 | 
			
		||||
                continue
 | 
			
		||||
            tbr = int_or_none(media.get('bitrate'))
 | 
			
		||||
            width = int_or_none(media.get('width'))
 | 
			
		||||
            height = int_or_none(media.get('height'))
 | 
			
		||||
            f = {
 | 
			
		||||
                'url': '%s/%s' % (base_url, media.attrib['url']),
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
                'width': width,
 | 
			
		||||
                'height': height,
 | 
			
		||||
            }
 | 
			
		||||
            if not (tbr or width or height):
 | 
			
		||||
                f['quality'] = 1 if '-hd.' in media_url else 0
 | 
			
		||||
            formats.append(f)
 | 
			
		||||
        formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
 | 
			
		||||
        for a_format in formats:
 | 
			
		||||
            if not dict_get(a_format, ['tbr', 'width', 'height']):
 | 
			
		||||
                a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										130
									
								
								youtube_dl/extractor/radiocanada.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								youtube_dl/extractor/radiocanada.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,130 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    xpath_element,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RadioCanadaIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'radiocanada'
 | 
			
		||||
    _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '7184272',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Le parcours du tireur capté sur vidéo',
 | 
			
		||||
            'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
 | 
			
		||||
            'upload_date': '20141023',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        app_code, video_id = re.match(self._VALID_URL, url).groups()
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        # TODO: extract m3u8 and f4m formats
 | 
			
		||||
        # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
 | 
			
		||||
        # f4m formats can be extracted using flashhd device_type but they produce unplayable file
 | 
			
		||||
        for device_type in ('flash',):
 | 
			
		||||
            v_data = self._download_xml(
 | 
			
		||||
                'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
 | 
			
		||||
                video_id, note='Downloading %s XML' % device_type, query={
 | 
			
		||||
                    'appCode': app_code,
 | 
			
		||||
                    'idMedia': video_id,
 | 
			
		||||
                    'connectionType': 'broadband',
 | 
			
		||||
                    'multibitrate': 'true',
 | 
			
		||||
                    'deviceType': device_type,
 | 
			
		||||
                    # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
 | 
			
		||||
                    'paysJ391wsHjbOJwvCs26toz': 'CA',
 | 
			
		||||
                    'bypasslock': 'NZt5K62gRqfc',
 | 
			
		||||
                })
 | 
			
		||||
            v_url = xpath_text(v_data, 'url')
 | 
			
		||||
            if not v_url:
 | 
			
		||||
                continue
 | 
			
		||||
            if v_url == 'null':
 | 
			
		||||
                raise ExtractorError('%s said: %s' % (
 | 
			
		||||
                    self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
 | 
			
		||||
            ext = determine_ext(v_url)
 | 
			
		||||
            if ext == 'm3u8':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
 | 
			
		||||
            elif ext == 'f4m':
 | 
			
		||||
                formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
 | 
			
		||||
            else:
 | 
			
		||||
                ext = determine_ext(v_url)
 | 
			
		||||
                bitrates = xpath_element(v_data, 'bitrates')
 | 
			
		||||
                for url_e in bitrates.findall('url'):
 | 
			
		||||
                    tbr = int_or_none(url_e.get('bitrate'))
 | 
			
		||||
                    if not tbr:
 | 
			
		||||
                        continue
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'format_id': 'rtmp-%d' % tbr,
 | 
			
		||||
                        'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
 | 
			
		||||
                        'ext': 'flv',
 | 
			
		||||
                        'protocol': 'rtmp',
 | 
			
		||||
                        'width': int_or_none(url_e.get('width')),
 | 
			
		||||
                        'height': int_or_none(url_e.get('height')),
 | 
			
		||||
                        'tbr': tbr,
 | 
			
		||||
                    })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        metadata = self._download_xml(
 | 
			
		||||
            'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
 | 
			
		||||
            video_id, note='Downloading metadata XML', query={
 | 
			
		||||
                'appCode': app_code,
 | 
			
		||||
                'idMedia': video_id,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        def get_meta(name):
 | 
			
		||||
            el = find_xpath_attr(metadata, './/Meta', 'name', name)
 | 
			
		||||
            return el.text if el is not None else None
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': get_meta('Title'),
 | 
			
		||||
            'description': get_meta('Description') or get_meta('ShortDescription'),
 | 
			
		||||
            'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
 | 
			
		||||
            'duration': int_or_none(get_meta('length')),
 | 
			
		||||
            'series': get_meta('Emission'),
 | 
			
		||||
            'season_number': int_or_none('SrcSaison'),
 | 
			
		||||
            'episode_number': int_or_none('SrcEpisode'),
 | 
			
		||||
            'upload_date': unified_strdate(get_meta('Date')),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RadioCanadaAudioVideoIE(InfoExtractor):
 | 
			
		||||
    'radiocanada:audiovideo'
 | 
			
		||||
    _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '7527184',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Barack Obama au Vietnam',
 | 
			
		||||
            'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
 | 
			
		||||
            'upload_date': '20160523',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
 | 
			
		||||
@@ -1,7 +1,12 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import ExtractorError
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RedTubeIE(InfoExtractor):
 | 
			
		||||
@@ -13,6 +18,9 @@ class RedTubeIE(InfoExtractor):
 | 
			
		||||
            'id': '66418',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Sucked on a toilet',
 | 
			
		||||
            'upload_date': '20120831',
 | 
			
		||||
            'duration': 596,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
@@ -24,12 +32,39 @@ class RedTubeIE(InfoExtractor):
 | 
			
		||||
        if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
 | 
			
		||||
            raise ExtractorError('Video %s has been removed' % video_id, expected=True)
 | 
			
		||||
 | 
			
		||||
        video_url = self._html_search_regex(
 | 
			
		||||
            r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
            r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
 | 
			
		||||
            webpage, 'title')
 | 
			
		||||
        video_thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            (r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
 | 
			
		||||
             r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
 | 
			
		||||
            webpage, 'title', group='title')
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        sources = self._parse_json(
 | 
			
		||||
            self._search_regex(
 | 
			
		||||
                r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
 | 
			
		||||
            video_id, fatal=False)
 | 
			
		||||
        if sources and isinstance(sources, dict):
 | 
			
		||||
            for format_id, format_url in sources.items():
 | 
			
		||||
                if format_url:
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'url': format_url,
 | 
			
		||||
                        'format_id': format_id,
 | 
			
		||||
                        'height': int_or_none(format_id),
 | 
			
		||||
                    })
 | 
			
		||||
        else:
 | 
			
		||||
            video_url = self._html_search_regex(
 | 
			
		||||
                r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
 | 
			
		||||
            formats.append({'url': video_url})
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
        upload_date = unified_strdate(self._search_regex(
 | 
			
		||||
            r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
 | 
			
		||||
            webpage, 'upload date', fatal=False))
 | 
			
		||||
        duration = int_or_none(self._search_regex(
 | 
			
		||||
            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
 | 
			
		||||
        view_count = str_to_int(self._search_regex(
 | 
			
		||||
            r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
 | 
			
		||||
            webpage, 'view count', fatal=False))
 | 
			
		||||
 | 
			
		||||
        # No self-labeling, but they describe themselves as
 | 
			
		||||
        # "Home of Videos Porno"
 | 
			
		||||
@@ -37,9 +72,12 @@ class RedTubeIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'thumbnail': video_thumbnail,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/reuters.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/reuters.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,69 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ReutersIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
 | 
			
		||||
        'md5': '8015113643a0b12838f160b0b81cc2ee',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '368575562',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'San Francisco police chief resigns',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
 | 
			
		||||
        video_data = js_to_json(self._search_regex(
 | 
			
		||||
            r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
 | 
			
		||||
            webpage, 'video data'))
 | 
			
		||||
 | 
			
		||||
        def get_json_value(key, fatal=False):
 | 
			
		||||
            return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
 | 
			
		||||
 | 
			
		||||
        title = unescapeHTML(get_json_value('title', fatal=True))
 | 
			
		||||
        mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
 | 
			
		||||
 | 
			
		||||
        mas_data = self._download_json(
 | 
			
		||||
            'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
 | 
			
		||||
            video_id, transform_source=js_to_json)
 | 
			
		||||
        formats = []
 | 
			
		||||
        for f in mas_data:
 | 
			
		||||
            f_url = f.get('url')
 | 
			
		||||
            if not f_url:
 | 
			
		||||
                continue
 | 
			
		||||
            method = f.get('method')
 | 
			
		||||
            if method == 'hls':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
            else:
 | 
			
		||||
                container = f.get('container')
 | 
			
		||||
                ext = '3gp' if method == 'mobile' else container
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': ext,
 | 
			
		||||
                    'url': f_url,
 | 
			
		||||
                    'ext': ext,
 | 
			
		||||
                    'container': container if method != 'mobile' else None,
 | 
			
		||||
                })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': get_json_value('thumb'),
 | 
			
		||||
            'duration': int_or_none(get_json_value('seconds')),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -13,8 +13,64 @@ from ..utils import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Revision3EmbedIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'revision3:embed'
 | 
			
		||||
    _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
 | 
			
		||||
        'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '67558',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'The Pros & Cons Of Zoos',
 | 
			
		||||
            'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
 | 
			
		||||
            'uploader_id': 'dnews',
 | 
			
		||||
            'uploader': 'DNews',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        playlist_id = mobj.group('playlist_id')
 | 
			
		||||
        playlist_type = mobj.group('playlist_type') or 'video_id'
 | 
			
		||||
        video_data = self._download_json(
 | 
			
		||||
            'http://revision3.com/api/getPlaylist.json', playlist_id, query={
 | 
			
		||||
                'api_key': self._API_KEY,
 | 
			
		||||
                'codecs': 'h264,vp8,theora',
 | 
			
		||||
                playlist_type: playlist_id,
 | 
			
		||||
            })['items'][0]
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for vcodec, media in video_data['media'].items():
 | 
			
		||||
            for quality_id, quality in media.items():
 | 
			
		||||
                if quality_id == 'hls':
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        quality['url'], playlist_id, 'mp4',
 | 
			
		||||
                        'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
                else:
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'url': quality['url'],
 | 
			
		||||
                        'format_id': '%s-%s' % (vcodec, quality_id),
 | 
			
		||||
                        'tbr': int_or_none(quality.get('bitrate')),
 | 
			
		||||
                        'vcodec': vcodec,
 | 
			
		||||
                    })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': playlist_id,
 | 
			
		||||
            'title': unescapeHTML(video_data['title']),
 | 
			
		||||
            'description': unescapeHTML(video_data.get('summary')),
 | 
			
		||||
            'uploader': video_data.get('show', {}).get('name'),
 | 
			
		||||
            'uploader_id': video_data.get('show', {}).get('slug'),
 | 
			
		||||
            'duration': int_or_none(video_data.get('duration')),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Revision3IE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
 | 
			
		||||
    IE_NAME = 'revision'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
 | 
			
		||||
        'md5': 'd94a72d85d0a829766de4deb8daaf7df',
 | 
			
		||||
@@ -32,52 +88,14 @@ class Revision3IE(InfoExtractor):
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # Show
 | 
			
		||||
        'url': 'http://testtube.com/brainstuff',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '251',
 | 
			
		||||
            'title': 'BrainStuff',
 | 
			
		||||
            'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 93,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '58227',
 | 
			
		||||
            'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
 | 
			
		||||
            'duration': 275,
 | 
			
		||||
            'ext': 'webm',
 | 
			
		||||
            'title': '5 Weird Ways Plants Can Eat Animals',
 | 
			
		||||
            'description': 'Why have some plants evolved to eat meat?',
 | 
			
		||||
            'upload_date': '20150120',
 | 
			
		||||
            'timestamp': 1421763300,
 | 
			
		||||
            'uploader': 'DNews',
 | 
			
		||||
            'uploader_id': 'dnews',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '71618',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
 | 
			
		||||
            'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
 | 
			
		||||
            'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
 | 
			
		||||
            'uploader': 'Editors\' Picks',
 | 
			
		||||
            'uploader_id': 'tt-editors-picks',
 | 
			
		||||
            'timestamp': 1453309200,
 | 
			
		||||
            'upload_date': '20160120',
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['Youtube'],
 | 
			
		||||
        'url': 'http://revision3.com/variant',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        # Tag
 | 
			
		||||
        'url': 'http://testtube.com/tech-news',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '21018',
 | 
			
		||||
            'title': 'tech news',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 9,
 | 
			
		||||
        'url': 'http://revision3.com/vr',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
    _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
 | 
			
		||||
    _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        domain, display_id = re.match(self._VALID_URL, url).groups()
 | 
			
		||||
@@ -119,33 +137,9 @@ class Revision3IE(InfoExtractor):
 | 
			
		||||
                })
 | 
			
		||||
                return info
 | 
			
		||||
 | 
			
		||||
            video_data = self._download_json(
 | 
			
		||||
                'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
 | 
			
		||||
                video_id)['items'][0]
 | 
			
		||||
 | 
			
		||||
            formats = []
 | 
			
		||||
            for vcodec, media in video_data['media'].items():
 | 
			
		||||
                for quality_id, quality in media.items():
 | 
			
		||||
                    if quality_id == 'hls':
 | 
			
		||||
                        formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                            quality['url'], video_id, 'mp4',
 | 
			
		||||
                            'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
                    else:
 | 
			
		||||
                        formats.append({
 | 
			
		||||
                            'url': quality['url'],
 | 
			
		||||
                            'format_id': '%s-%s' % (vcodec, quality_id),
 | 
			
		||||
                            'tbr': int_or_none(quality.get('bitrate')),
 | 
			
		||||
                            'vcodec': vcodec,
 | 
			
		||||
                        })
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
            info.update({
 | 
			
		||||
                'title': unescapeHTML(video_data['title']),
 | 
			
		||||
                'description': unescapeHTML(video_data.get('summary')),
 | 
			
		||||
                'uploader': video_data.get('show', {}).get('name'),
 | 
			
		||||
                'uploader_id': video_data.get('show', {}).get('slug'),
 | 
			
		||||
                'duration': int_or_none(video_data.get('duration')),
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
                '_type': 'url_transparent',
 | 
			
		||||
                'url': 'revision3:%s' % video_id,
 | 
			
		||||
            })
 | 
			
		||||
            return info
 | 
			
		||||
        else:
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,9 @@ import re
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_struct_unpack,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
@@ -13,7 +16,6 @@ from ..utils import (
 | 
			
		||||
    remove_start,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    std_headers,
 | 
			
		||||
    struct_unpack,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -21,7 +23,7 @@ def _decrypt_url(png):
 | 
			
		||||
    encrypted_data = base64.b64decode(png.encode('utf-8'))
 | 
			
		||||
    text_index = encrypted_data.find(b'tEXt')
 | 
			
		||||
    text_chunk = encrypted_data[text_index - 4:]
 | 
			
		||||
    length = struct_unpack('!I', text_chunk[:4])[0]
 | 
			
		||||
    length = compat_struct_unpack('!I', text_chunk[:4])[0]
 | 
			
		||||
    # Use bytearray to get integers when iterating in both python 2.x and 3.x
 | 
			
		||||
    data = bytearray(text_chunk[8:8 + length])
 | 
			
		||||
    data = [chr(b) for b in data if b != 0]
 | 
			
		||||
@@ -62,7 +64,7 @@ def _decrypt_url(png):
 | 
			
		||||
class RTVEALaCartaIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'rtve.es:alacarta'
 | 
			
		||||
    IE_DESC = 'RTVE a la carta'
 | 
			
		||||
    _VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
 | 
			
		||||
    _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
 | 
			
		||||
@@ -85,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/seeker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/seeker.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SeekerIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # player.loadRevision3Item
 | 
			
		||||
        'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
 | 
			
		||||
        'md5': '30c1dc4030cc715cf05b423d0947ac18',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '76243',
 | 
			
		||||
            'ext': 'webm',
 | 
			
		||||
            'title': 'Should Trump Be Required To Release His Tax Returns?',
 | 
			
		||||
            'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
 | 
			
		||||
            'uploader': 'Seeker Daily',
 | 
			
		||||
            'uploader_id': 'seekerdaily',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
 | 
			
		||||
        'playlist': [
 | 
			
		||||
            {
 | 
			
		||||
                'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
 | 
			
		||||
                'info_dict': {
 | 
			
		||||
                    'id': '67558',
 | 
			
		||||
                    'ext': 'mp4',
 | 
			
		||||
                    'title': 'The Pros & Cons Of Zoos',
 | 
			
		||||
                    'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
 | 
			
		||||
                    'uploader': 'DNews',
 | 
			
		||||
                    'uploader_id': 'dnews',
 | 
			
		||||
                },
 | 
			
		||||
            }
 | 
			
		||||
        ],
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1834116536',
 | 
			
		||||
            'title': 'After Gorilla Killing, Changes Ahead for Zoos',
 | 
			
		||||
            'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id, article_id = re.match(self._VALID_URL, url).groups()
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
        mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            playlist_type, playlist_id = mobj.groups()
 | 
			
		||||
            return self.url_result(
 | 
			
		||||
                'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
 | 
			
		||||
        else:
 | 
			
		||||
            entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
 | 
			
		||||
                r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
 | 
			
		||||
            return self.playlist_result(
 | 
			
		||||
                entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))
 | 
			
		||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/sendtonews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/sendtonews.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,86 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .jwplatform import JWPlatformBaseIE
 | 
			
		||||
from ..compat import compat_parse_qs
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SendtoNewsIE(JWPlatformBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
 | 
			
		||||
        'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'GxfCe0Zo7D-175909-5588',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Recap: CLE 15, CIN 6',
 | 
			
		||||
            'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
 | 
			
		||||
            'duration': 49,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _extract_url(cls, webpage):
 | 
			
		||||
        mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
 | 
			
		||||
            (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
 | 
			
		||||
                .*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
 | 
			
		||||
            \1>''', webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            sk, mk, pk = mobj.group('SC').split('-')
 | 
			
		||||
            return cls._URL_TEMPLATE % (sk, mk, pk)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        params = compat_parse_qs(mobj.group('query'))
 | 
			
		||||
 | 
			
		||||
        if 'SK' not in params or 'MK' not in params or 'PK' not in params:
 | 
			
		||||
            raise ExtractorError('Invalid URL', expected=True)
 | 
			
		||||
 | 
			
		||||
        video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        jwplayer_data_str = self._search_regex(
 | 
			
		||||
            r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
 | 
			
		||||
        js_vars = {
 | 
			
		||||
            'w': 1024,
 | 
			
		||||
            'h': 768,
 | 
			
		||||
            'modeVar': 'html5',
 | 
			
		||||
        }
 | 
			
		||||
        for name, val in js_vars.items():
 | 
			
		||||
            js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
 | 
			
		||||
            jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
 | 
			
		||||
 | 
			
		||||
        info_dict = self._parse_jwplayer_data(
 | 
			
		||||
            self._parse_json(jwplayer_data_str, video_id),
 | 
			
		||||
            video_id, require_title=False, rtmp_params={'no_resume': True})
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
 | 
			
		||||
        description = self._html_search_regex(
 | 
			
		||||
            r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
 | 
			
		||||
            'description', fatal=False)
 | 
			
		||||
        duration = parse_duration(self._html_search_regex(
 | 
			
		||||
            r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
 | 
			
		||||
            'duration', fatal=False))
 | 
			
		||||
 | 
			
		||||
        info_dict.update({
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
        return info_dict
 | 
			
		||||
@@ -4,28 +4,35 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_urllib_parse_urlencode
 | 
			
		||||
from ..utils import sanitized_Request
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    update_url_query,
 | 
			
		||||
    qualities,
 | 
			
		||||
    get_element_by_attribute,
 | 
			
		||||
    clean_html,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SinaIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
 | 
			
		||||
                        (
 | 
			
		||||
                            (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
 | 
			
		||||
                            |
 | 
			
		||||
    _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
 | 
			
		||||
                        (?:
 | 
			
		||||
                            (?:view/|.*\#)(?P<video_id>\d+)|
 | 
			
		||||
                            .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
 | 
			
		||||
                            # This is used by external sites like Weibo
 | 
			
		||||
                            (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
 | 
			
		||||
                            api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
 | 
			
		||||
                        )
 | 
			
		||||
                  '''
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
 | 
			
		||||
            'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
 | 
			
		||||
            'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622',
 | 
			
		||||
            'md5': 'd38433e2fc886007729735650ae4b3e9',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '110028898',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
 | 
			
		||||
                'id': '250576622',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -35,37 +42,74 @@ class SinaIE(InfoExtractor):
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': '军方提高对朝情报监视级别',
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'the page does not exist or has been deleted',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://video.sina.com.cn/view/250587748.html',
 | 
			
		||||
            'md5': '3d1807a25c775092aab3bc157fff49b4',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '250587748',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _extract_video(self, video_id):
 | 
			
		||||
        data = compat_urllib_parse_urlencode({'vid': video_id})
 | 
			
		||||
        url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
 | 
			
		||||
                                     video_id, 'Downloading video url')
 | 
			
		||||
        image_page = self._download_webpage(
 | 
			
		||||
            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
 | 
			
		||||
            video_id, 'Downloading thumbnail info')
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'url': url_doc.find('./durl/url').text,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': url_doc.find('./vname').text,
 | 
			
		||||
                'thumbnail': image_page.split('=')[1],
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        if mobj.group('token') is not None:
 | 
			
		||||
            # The video id is in the redirected url
 | 
			
		||||
            self.to_screen('Getting video id')
 | 
			
		||||
            request = sanitized_Request(url)
 | 
			
		||||
            request.get_method = lambda: 'HEAD'
 | 
			
		||||
            (_, urlh) = self._download_webpage_handle(request, 'NA', False)
 | 
			
		||||
            return self._real_extract(urlh.geturl())
 | 
			
		||||
        elif video_id is None:
 | 
			
		||||
            pseudo_id = mobj.group('pseudo_id')
 | 
			
		||||
            webpage = self._download_webpage(url, pseudo_id)
 | 
			
		||||
            video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id')
 | 
			
		||||
 | 
			
		||||
        return self._extract_video(video_id)
 | 
			
		||||
        video_id = mobj.group('video_id')
 | 
			
		||||
        if not video_id:
 | 
			
		||||
            if mobj.group('token') is not None:
 | 
			
		||||
                # The video id is in the redirected url
 | 
			
		||||
                self.to_screen('Getting video id')
 | 
			
		||||
                request = HEADRequest(url)
 | 
			
		||||
                (_, urlh) = self._download_webpage_handle(request, 'NA', False)
 | 
			
		||||
                return self._real_extract(urlh.geturl())
 | 
			
		||||
            else:
 | 
			
		||||
                pseudo_id = mobj.group('pseudo_id')
 | 
			
		||||
                webpage = self._download_webpage(url, pseudo_id)
 | 
			
		||||
                error = get_element_by_attribute('class', 'errtitle', webpage)
 | 
			
		||||
                if error:
 | 
			
		||||
                    raise ExtractorError('%s said: %s' % (
 | 
			
		||||
                        self.IE_NAME, clean_html(error)), expected=True)
 | 
			
		||||
                video_id = self._search_regex(
 | 
			
		||||
                    r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
 | 
			
		||||
 | 
			
		||||
        video_data = self._download_json(
 | 
			
		||||
            'http://s.video.sina.com.cn/video/h5play',
 | 
			
		||||
            video_id, query={'video_id': video_id})
 | 
			
		||||
        if video_data['code'] != 1:
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (
 | 
			
		||||
                self.IE_NAME, video_data['message']), expected=True)
 | 
			
		||||
        else:
 | 
			
		||||
            video_data = video_data['data']
 | 
			
		||||
            title = video_data['title']
 | 
			
		||||
            description = video_data.get('description')
 | 
			
		||||
            if description:
 | 
			
		||||
                description = description.strip()
 | 
			
		||||
 | 
			
		||||
            preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd'])
 | 
			
		||||
            formats = []
 | 
			
		||||
            for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items():
 | 
			
		||||
                file_api = quality.get('file_api')
 | 
			
		||||
                file_id = quality.get('file_id')
 | 
			
		||||
                if not file_api or not file_id:
 | 
			
		||||
                    continue
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': quality_id,
 | 
			
		||||
                    'url': update_url_query(file_api, {'vid': file_id}),
 | 
			
		||||
                    'preference': preference(quality_id),
 | 
			
		||||
                    'ext': 'mp4',
 | 
			
		||||
                })
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
            return {
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'description': description,
 | 
			
		||||
                'thumbnail': video_data.get('image'),
 | 
			
		||||
                'duration': int_or_none(video_data.get('length')),
 | 
			
		||||
                'timestamp': int_or_none(video_data.get('create_time')),
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
            }
 | 
			
		||||
 
 | 
			
		||||
@@ -96,20 +96,18 @@ class SpankwireIE(InfoExtractor):
 | 
			
		||||
        formats = []
 | 
			
		||||
        for height, video_url in zip(heights, video_urls):
 | 
			
		||||
            path = compat_urllib_parse_urlparse(video_url).path
 | 
			
		||||
            _, quality = path.split('/')[4].split('_')[:2]
 | 
			
		||||
            f = {
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'height': height,
 | 
			
		||||
            }
 | 
			
		||||
            tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
 | 
			
		||||
            if tbr:
 | 
			
		||||
                f.update({
 | 
			
		||||
                    'tbr': int(tbr),
 | 
			
		||||
                    'format_id': '%dp' % height,
 | 
			
		||||
                })
 | 
			
		||||
            m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
 | 
			
		||||
            if m:
 | 
			
		||||
                tbr = int(m.group('tbr'))
 | 
			
		||||
                height = int(m.group('height'))
 | 
			
		||||
            else:
 | 
			
		||||
                f['format_id'] = quality
 | 
			
		||||
            formats.append(f)
 | 
			
		||||
                tbr = None
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'format_id': '%dp' % height,
 | 
			
		||||
                'height': height,
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        age_limit = self._rta_search(webpage)
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
 | 
			
		||||
        'md5': '3d6361864d7cac20b57c8784da17166f',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
 | 
			
		||||
            'duration': 422.255,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        'add_ie': ['Ooyala'],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -88,7 +88,7 @@ class TeamcocoIE(InfoExtractor):
 | 
			
		||||
        preload_codes = self._html_search_regex(
 | 
			
		||||
            r'(function.+)setTimeout\(function\(\)\{playlist',
 | 
			
		||||
            webpage, 'preload codes')
 | 
			
		||||
        base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
 | 
			
		||||
        base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
 | 
			
		||||
        base64_fragments.remove('init')
 | 
			
		||||
 | 
			
		||||
        def _check_sequence(cur_fragments):
 | 
			
		||||
 
 | 
			
		||||
@@ -2,14 +2,16 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import remove_end
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    remove_end,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TelegraafIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
 | 
			
		||||
        'md5': '83245a9779bcc4a24454bfd53c65b6dc',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '24353229',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor):
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            'duration': 33,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        playlist_id = self._match_id(url)
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, playlist_id)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        player_url = self._html_search_regex(
 | 
			
		||||
            r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
 | 
			
		||||
        player_page = self._download_webpage(
 | 
			
		||||
            player_url, video_id, note='Download player webpage')
 | 
			
		||||
        playlist_url = self._search_regex(
 | 
			
		||||
            r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
 | 
			
		||||
            r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
 | 
			
		||||
        playlist_data = self._download_json(playlist_url, video_id)
 | 
			
		||||
 | 
			
		||||
        item = playlist_data['items'][0]
 | 
			
		||||
        formats = []
 | 
			
		||||
        locations = item['locations']
 | 
			
		||||
        for location in locations.get('adaptive', []):
 | 
			
		||||
            manifest_url = location['src']
 | 
			
		||||
            ext = determine_ext(manifest_url)
 | 
			
		||||
            if ext == 'm3u8':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    manifest_url, video_id, ext='mp4', m3u8_id='hls'))
 | 
			
		||||
            elif ext == 'mpd':
 | 
			
		||||
                # TODO: Current DASH formats are broken - $Time$ pattern in
 | 
			
		||||
                # <SegmentTemplate> not implemented yet
 | 
			
		||||
                continue
 | 
			
		||||
            else:
 | 
			
		||||
                self.report_warning('Unknown adaptive format %s' % ext)
 | 
			
		||||
        for location in locations.get('progressive', []):
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': location['sources'][0]['src'],
 | 
			
		||||
                'width': location.get('width'),
 | 
			
		||||
                'height': location.get('height'),
 | 
			
		||||
                'format_id': 'http-%s' % location['label'],
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        entries = self._extract_xspf_playlist(playlist_url, playlist_id)
 | 
			
		||||
        title = remove_end(self._og_search_title(webpage), ' - VIDEO')
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        duration = item.get('duration')
 | 
			
		||||
        thumbnail = item.get('poster')
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, playlist_id, title, description)
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
class TF1IE(InfoExtractor):
 | 
			
		||||
    """TF1 uses the wat.tv player."""
 | 
			
		||||
    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
 | 
			
		||||
    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        wat_id = self._html_search_regex(
 | 
			
		||||
            r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
 | 
			
		||||
            r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
 | 
			
		||||
            webpage, 'wat id', group='id')
 | 
			
		||||
        return self.url_result('wat:%s' % wat_id, 'Wat')
 | 
			
		||||
 
 | 
			
		||||
@@ -14,11 +14,13 @@ from ..compat import (
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
    update_url_query,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
    mimetype2ext,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
@@ -48,6 +50,12 @@ class ThePlatformBaseIE(OnceIE):
 | 
			
		||||
            if OnceIE.suitable(_format['url']):
 | 
			
		||||
                formats.extend(self._extract_once_formats(_format['url']))
 | 
			
		||||
            else:
 | 
			
		||||
                media_url = _format['url']
 | 
			
		||||
                if determine_ext(media_url) == 'm3u8':
 | 
			
		||||
                    hdnea2 = self._get_cookies(media_url).get('hdnea2')
 | 
			
		||||
                    if hdnea2:
 | 
			
		||||
                        _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
 | 
			
		||||
 | 
			
		||||
                formats.append(_format)
 | 
			
		||||
 | 
			
		||||
        subtitles = self._parse_smil_subtitles(meta, default_ns)
 | 
			
		||||
@@ -151,6 +159,22 @@ class ThePlatformIE(ThePlatformBaseIE):
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _extract_urls(cls, webpage):
 | 
			
		||||
        m = re.search(
 | 
			
		||||
            r'''(?x)
 | 
			
		||||
                    <meta\s+
 | 
			
		||||
                        property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
 | 
			
		||||
                        content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
 | 
			
		||||
            ''', webpage)
 | 
			
		||||
        if m:
 | 
			
		||||
            return [m.group('url')]
 | 
			
		||||
 | 
			
		||||
        matches = re.findall(
 | 
			
		||||
            r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
 | 
			
		||||
        if matches:
 | 
			
		||||
            return list(zip(*matches))[1]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
 | 
			
		||||
        flags = '10' if include_qs else '00'
 | 
			
		||||
 
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user