Compare commits
	
		
			524 Commits
		
	
	
		
			2013.07.24
			...
			2013.10.07
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 4481a754e4 | ||
|   | faa6ef6bc8 | ||
|   | 15870e90b0 | ||
|   | 387ae5f30b | ||
|   | 1310bf2474 | ||
|   | b24f347190 | ||
|   | ee6c9f95e1 | ||
|   | 2a69c6b879 | ||
|   | cfadd183c4 | ||
|   | e484c81f0c | ||
|   | 7e5e8306fd | ||
|   | 41e8bca4d0 | ||
|   | 8dbe9899a9 | ||
|   | f4aac741d5 | ||
|   | c1c9a79c49 | ||
|   | 226113c880 | ||
|   | 8932a66e49 | ||
|   | 79cfb46d42 | ||
|   | 00fcc17aee | ||
|   | e94b783c74 | ||
|   | 97dae9ae07 | ||
|   | c3fef636b5 | ||
|   | 46e28a84ca | ||
|   | 17ad2b3fb1 | ||
|   | 5e2a60db4a | ||
|   | cd214418f6 | ||
|   | ba2d9f213e | ||
|   | 7f8ae73a5d | ||
|   | 466880f531 | ||
|   | 9f1f6d2437 | ||
|   | 9e0f897f6b | ||
|   | c0f6aa876f | ||
|   | d93bdee9a6 | ||
|   | f13d09332d | ||
|   | 2f5865cc6d | ||
|   | deefc05b88 | ||
|   | 0d8cb1cc14 | ||
|   | a90b9fd209 | ||
|   | 829493439a | ||
|   | 3cd022f6e6 | ||
|   | abefd1f7c4 | ||
|   | c21315f273 | ||
|   | 9ab1018b1a | ||
|   | da0a5d2d6e | ||
|   | ee6adb166c | ||
|   | be8fe32c92 | ||
|   | c38b1e776d | ||
|   | 4f8bf17f23 | ||
|   | ca40186c75 | ||
|   | a8c6b24155 | ||
|   | bd8e5c7ca2 | ||
|   | 7c61bd36bb | ||
|   | c54283824c | ||
|   | 52f15da2ca | ||
|   | 44d466559e | ||
|   | 05751eb047 | ||
|   | f10503db67 | ||
|   | adfeafe9e1 | ||
|   | 4c62a16f4f | ||
|   | c0de39e6d4 | ||
|   | fa55675593 | ||
|   | d4d9920a26 | ||
|   | 47192f92d8 | ||
|   | 722076a123 | ||
|   | bb4aa62cf7 | ||
|   | 843530568f | ||
|   | 138a5454b5 | ||
|   | d279037036 | ||
|   | 46353f6783 | ||
|   | 70922df8b5 | ||
|   | 9c15e9de84 | ||
|   | 123c10608d | ||
|   | 0b7c2485b6 | ||
|   | 9abb32045a | ||
|   | f490e77e77 | ||
|   | 2dc592991a | ||
|   | 0a60edcfa9 | ||
|   | c53f9d30c8 | ||
|   | 509f398292 | ||
|   | 74bab3f0a4 | ||
|   | 8574862991 | ||
|   | 2de957c7e1 | ||
|   | 920de7a27d | ||
|   | 63efc427cd | ||
|   | ce65fb6c76 | ||
|   | 4de1994b6e | ||
|   | 592882aa9f | ||
|   | b98d6a1e19 | ||
|   | 29c7a63df8 | ||
|   | 8b25323ae2 | ||
|   | f426de8460 | ||
|   | 695dc094ab | ||
|   | e80d861064 | ||
|   | 2cdeb20135 | ||
|   | 7f74773254 | ||
|   | f2c327fd39 | ||
|   | e35e4ddc9a | ||
|   | c3c88a2664 | ||
|   | bb0eee71e7 | ||
|   | 6f56389b88 | ||
|   | 5b333c1ce6 | ||
|   | a825f33030 | ||
|   | 92f618f2e2 | ||
|   | 81ec7c7901 | ||
|   | dd5d2eb03c | ||
|   | 4ae720042c | ||
|   | c705320f48 | ||
|   | d2d8f89531 | ||
|   | bdde940e90 | ||
|   | 45f4a76dbc | ||
|   | 13dc64ce74 | ||
|   | c35f9e72ce | ||
|   | f8061589e6 | ||
|   | 0ca96d48c7 | ||
|   | 4ba146f35d | ||
|   | edf3e38ebd | ||
|   | c4417ddb61 | ||
|   | 4a2080e407 | ||
|   | 2f2ffea9ca | ||
|   | ba552f542f | ||
|   | 8379969834 | ||
|   | 95dbd2f990 | ||
|   | a7177865b1 | ||
|   | e0df6211cc | ||
|   | b00ca882a4 | ||
|   | 39baacc49f | ||
|   | 3a1d48d6de | ||
|   | 34308b30d6 | ||
|   | bc1506f8c0 | ||
|   | b61067fa4f | ||
|   | 69b227a9bc | ||
|   | 0fd49457f5 | ||
|   | 58f289d013 | ||
|   | 3d60bb96e1 | ||
|   | 38d025b3f0 | ||
|   | c40c6aaaaa | ||
|   | 1a810f0d4e | ||
|   | 63037593c0 | ||
|   | 7a878d47fa | ||
|   | bc4b900898 | ||
|   | c5e743f66f | ||
|   | 6c36d8d6fb | ||
|   | 71c82637e7 | ||
|   | 2dad310e2c | ||
|   | d0ae9e3a8d | ||
|   | a19413c311 | ||
|   | 1ef80b55dd | ||
|   | eb03f4dad3 | ||
|   | 830dd1944a | ||
|   | 1237c9a3a5 | ||
|   | 5d13df79a5 | ||
|   | 6523223a4c | ||
|   | 4a67aafb7e | ||
|   | f3f34c5b0f | ||
|   | 6ae8ee3f54 | ||
|   | e8f8e80097 | ||
|   | 4dc0ff3ecf | ||
|   | 4b6462fc1e | ||
|   | c4ece78564 | ||
|   | 0761d02b0b | ||
|   | 71c107fc57 | ||
|   | 7459e3a290 | ||
|   | f9e66fb993 | ||
|   | 6c603ccce3 | ||
|   | ef66b0c6ef | ||
|   | 22b50ecb2f | ||
|   | 5a6fecc3de | ||
|   | cdbccafed9 | ||
|   | e69ae5b9e7 | ||
|   | 92790f4e54 | ||
|   | 471a5ee908 | ||
|   | 19e1d35989 | ||
|   | 0b7f31184d | ||
|   | fad84d50fe | ||
|   | 9a1c32dc54 | ||
|   | a921f40799 | ||
|   | 74ac9bdd82 | ||
|   | 94518f2087 | ||
|   | 535f59bbcf | ||
|   | 71cedb3c0c | ||
|   | dd01d6558a | ||
|   | ce85f022d2 | ||
|   | ad94a6fe44 | ||
|   | 353ba14060 | ||
|   | 83de794223 | ||
|   | bfd5c93af9 | ||
|   | c247d87ef3 | ||
|   | 07ac9e2cc2 | ||
|   | 6bc520c207 | ||
|   | f1d20fa39f | ||
|   | e3dc22ca3a | ||
|   | d665f8d3cb | ||
|   | 055e6f3657 | ||
|   | ac4f319ba1 | ||
|   | 542cca0e8c | ||
|   | 6a2449df3b | ||
|   | 7fad1c6328 | ||
|   | d82134c339 | ||
|   | 54d39d8b2f | ||
|   | de7f3446e0 | ||
|   | f8e52269c1 | ||
|   | cf1dd0c59e | ||
|   | 22c8b52545 | ||
|   | 1f7dc42cd0 | ||
|   | aa8f2641da | ||
|   | 648d25d43d | ||
|   | df3e61003a | ||
|   | 6b361ad5ee | ||
|   | 5d8afe69f7 | ||
|   | a1ab553858 | ||
|   | 07463ea162 | ||
|   | 6d2d21f713 | ||
|   | 061b2889a9 | ||
|   | 8963d9c266 | ||
|   | 890f62e868 | ||
|   | 8f362589a5 | ||
|   | a27a2470cd | ||
|   | 72836fcee4 | ||
|   | a7130543fa | ||
|   | a490fda746 | ||
|   | 7e77275293 | ||
|   | d6e203b3dc | ||
|   | e3ea479087 | ||
|   | faab1d3836 | ||
|   | 8851a574a3 | ||
|   | 59282080c8 | ||
|   | 98f3da4040 | ||
|   | 1d213233cd | ||
|   | fd9cf73836 | ||
|   | 0638ad9999 | ||
|   | 1eb527692a | ||
|   | 09bb17e108 | ||
|   | 1cf911bc82 | ||
|   | f4b052321b | ||
|   | a636203ea5 | ||
|   | c215217e39 | ||
|   | 08e291b54d | ||
|   | 6b95b065be | ||
|   | 9363169b67 | ||
|   | 085bea4513 | ||
|   | 150f20828b | ||
|   | 08523ee20a | ||
|   | 5d5171d26a | ||
|   | 96fb5605b2 | ||
|   | 7011de0bc2 | ||
|   | c3dd69eab4 | ||
|   | 025171c476 | ||
|   | c8dbccde30 | ||
|   | 4ff7a0f1f6 | ||
|   | 9c2ade40de | ||
|   | aa32314d09 | ||
|   | 52afe99665 | ||
|   | b0446d6a33 | ||
|   | 8e4e89f1c2 | ||
|   | 6c758d79de | ||
|   | 691008087b | ||
|   | 85f03346eb | ||
|   | bdc6b3fc64 | ||
|   | 847f582290 | ||
|   | 10f5c016ec | ||
|   | 2e756879f1 | ||
|   | c7a7750d3b | ||
|   | 9193c1eede | ||
|   | b3f0e53048 | ||
|   | 3243d0f7b6 | ||
|   | 23b00bc0e4 | ||
|   | 52e1eea18b | ||
|   | ee80d66727 | ||
|   | f1fb2d12b3 | ||
|   | deb2c73212 | ||
|   | 8928491074 | ||
|   | 545434670b | ||
|   | 54fda45bac | ||
|   | c7bf7366bc | ||
|   | b7052e5087 | ||
|   | 0d75ae2ce3 | ||
|   | b5ba7b9dcf | ||
|   | 483e0ddd4d | ||
|   | 2891932bf0 | ||
|   | 591078babf | ||
|   | 9868c781a1 | ||
|   | c257baff85 | ||
|   | 878e83c5a4 | ||
|   | 0012690aae | ||
|   | 6e74bc41ca | ||
|   | cba892fa1f | ||
|   | 550bfd4cbd | ||
|   | 920ef0779b | ||
|   | 48ea9cea77 | ||
|   | ccf4b799df | ||
|   | f143d86ad2 | ||
|   | 8ae97d76ee | ||
|   | f8b362739e | ||
|   | 6d69d03bac | ||
|   | 204da0d3e3 | ||
|   | c496ca96e7 | ||
|   | 67b22dd036 | ||
|   | ce6a696e4d | ||
|   | a5caba1eb0 | ||
|   | cd9c100963 | ||
|   | edde6c56ac | ||
|   | b7f89fe692 | ||
|   | ae3531adf9 | ||
|   | 8cf5ee7831 | ||
|   | aa3e950764 | ||
|   | 1301a0dd42 | ||
|   | af8bd6a82d | ||
|   | 6d38616e67 | ||
|   | 4f5f18acb9 | ||
|   | 3e223834d9 | ||
|   | a1bb0f8773 | ||
|   | 0e283428f7 | ||
|   | 2eabb80254 | ||
|   | 44586389e4 | ||
|   | 06a401c845 | ||
|   | 273f603efb | ||
|   | 1619e22f40 | ||
|   | 88a79ce6a6 | ||
|   | acebc9cd6b | ||
|   | 443c12a703 | ||
|   | 7f3c4f4f65 | ||
|   | 0bc56fa66a | ||
|   | 1a582dd49d | ||
|   | c5b921b597 | ||
|   | e86ea47c02 | ||
|   | aa5a63a5b5 | ||
|   | 2a7b4da9b2 | ||
|   | 069d098f84 | ||
|   | b3889f7023 | ||
|   | 65883c8dbd | ||
|   | 341ca8d74c | ||
|   | 99859d436c | ||
|   | 1b01e2b085 | ||
|   | 976fc7d137 | ||
|   | c3b7b29c23 | ||
|   | 627a91a9a8 | ||
|   | 6dc6302599 | ||
|   | 7a20e2e1f8 | ||
|   | 90648143c3 | ||
|   | 5c6658d4dd | ||
|   | 9585f890f8 | ||
|   | 0838239e8e | ||
|   | 36399e8576 | ||
|   | 9460db832c | ||
|   | d68730a56e | ||
|   | f2aeefe29c | ||
|   | 39c6f507df | ||
|   | d2d1eb5b0a | ||
|   | 8ae7be3ef4 | ||
|   | 306170518f | ||
|   | aa6a10c44a | ||
|   | 9af73dc4fc | ||
|   | fc483bb6af | ||
|   | 53b0f3e4e2 | ||
|   | 4353cf51a0 | ||
|   | ce34e9ce5e | ||
|   | d4051a8e05 | ||
|   | df3df7fb64 | ||
|   | 9e9c164052 | ||
|   | 066090dd3f | ||
|   | 614d9c19c1 | ||
|   | bd2dee6c67 | ||
|   | 74e6672beb | ||
|   | 02bcf0d389 | ||
|   | 18b4e04f1c | ||
|   | 10204dc898 | ||
|   | 1865ed31b9 | ||
|   | 3669cdba10 | ||
|   | 939fbd26ac | ||
|   | b4e60dac23 | ||
|   | e6ddb4e7af | ||
|   | 83390b83d9 | ||
|   | ff2424595a | ||
|   | adeb9c73d6 | ||
|   | cd0abcc0bb | ||
|   | 4a55479fa9 | ||
|   | f527115b5f | ||
|   | 75e1b46add | ||
|   | 05a2926c5c | ||
|   | 7070b83687 | ||
|   | 8d212e604a | ||
|   | 063fcc9676 | ||
|   | 8403612258 | ||
|   | 25b51c7816 | ||
|   | 9779b63bb6 | ||
|   | d81aef3adf | ||
|   | 5af7e056a7 | ||
|   | 45ed795cb0 | ||
|   | 683e98a8a4 | ||
|   | e0cfeb2ea7 | ||
|   | 75340ee383 | ||
|   | 668de34c6b | ||
|   | a91b954bb4 | ||
|   | a3f62b8255 | ||
|   | 37b6d5f684 | ||
|   | b7a6838407 | ||
|   | cde846b3d3 | ||
|   | 6c3e6e88d3 | ||
|   | 739674cd77 | ||
|   | 4b2d7cae11 | ||
|   | 7fea7156cb | ||
|   | 3093468977 | ||
|   | 79cb25776f | ||
|   | 87f78946a5 | ||
|   | 211fbc1328 | ||
|   | 836a086ce9 | ||
|   | 90d3989b99 | ||
|   | d741e55a42 | ||
|   | 17d3aaaf16 | ||
|   | ea55b2a4ca | ||
|   | 3f0537dd4a | ||
|   | 943f7f7a39 | ||
|   | 12e895fc5a | ||
|   | bda2c49d75 | ||
|   | 01b32990da | ||
|   | dbda1b5147 | ||
|   | ddf3bd328b | ||
|   | b9c37b92cf | ||
|   | 5a27ecdd2e | ||
|   | f9c3c90ca8 | ||
|   | 6daccbe317 | ||
|   | 71ea844c0e | ||
|   | 3a7256697e | ||
|   | d1ba998274 | ||
|   | 718ced8d8c | ||
|   | e1842025d0 | ||
|   | 2b9213cdc1 | ||
|   | e3a88568b0 | ||
|   | 0577177e3e | ||
|   | 298f833b16 | ||
|   | 97b3656c2e | ||
|   | f3bcebb1d2 | ||
|   | 0f399e6e5e | ||
|   | 5b075e27cb | ||
|   | 8a9d86a2a7 | ||
|   | d80a064eff | ||
|   | d468a09789 | ||
|   | 9f4ab73d7f | ||
|   | 02cf62e240 | ||
|   | d55de6eec2 | ||
|   | 69df680b97 | ||
|   | 447591e1ae | ||
|   | 33eb0ce4c4 | ||
|   | 505c28aac9 | ||
|   | 67fb0c5495 | ||
|   | 4efba05c56 | ||
|   | 8377574c9c | ||
|   | 0f90943e45 | ||
|   | 526e638c8a | ||
|   | 372297e713 | ||
|   | 356e067390 | ||
|   | e2f48f9643 | ||
|   | b513a251f8 | ||
|   | 953e32b2c1 | ||
|   | 5898e28272 | ||
|   | 67dfbc0cb9 | ||
|   | 36cb11f068 | ||
|   | 7a4c6cc92f | ||
|   | 7edcb8f39c | ||
|   | d5b00ee6e0 | ||
|   | 461cead4f7 | ||
|   | b5a6d40818 | ||
|   | 968b5e0112 | ||
|   | 39b782b390 | ||
|   | 577664c8e8 | ||
|   | bba12cec89 | ||
|   | 70c4c03cb8 | ||
|   | f5791ed136 | ||
|   | 4ec929dc9b | ||
|   | fbf189a6ee | ||
|   | 09825cb5c0 | ||
|   | ed27d35674 | ||
|   | fd5539eb41 | ||
|   | 04bca64bde | ||
|   | 03cc7c20c1 | ||
|   | 4075311d94 | ||
|   | 6624a2b07d | ||
|   | 6d3a7d03e1 | ||
|   | 95fdc7d69c | ||
|   | 86fe61c8f9 | ||
|   | 9bb6d2f21d | ||
|   | e3f4593e76 | ||
|   | 1d043b93cf | ||
|   | b15d4f624f | ||
|   | 4aa16a50f5 | ||
|   | bbcbf4d459 | ||
|   | 930ad9eecc | ||
|   | b072a9defd | ||
|   | 75952c6e3d | ||
|   | 05afc96b73 | ||
|   | fa80026915 | ||
|   | 2bc3de0f28 | ||
|   | 99c7bc94af | ||
|   | 152c8f349d | ||
|   | d75654c15e | ||
|   | 0725f584e1 | ||
|   | 8cda9241d1 | ||
|   | a3124ba49f | ||
|   | 579e2691fe | ||
|   | 63f05de10b | ||
|   | caeefc29eb | ||
|   | a3c736def2 | ||
|   | 58261235f0 | ||
|   | da70877a1b | ||
|   | 5c468ca8a8 | ||
|   | aedd6bb97d | ||
|   | 733d9cacb8 | ||
|   | 42f2805e48 | ||
|   | 0ffcb7c6fc | ||
|   | 27669bd11d | ||
|   | 6625f82940 | ||
|   | d0866f0bb4 | ||
|   | 09eeb75130 | ||
|   | 0a99956f71 | ||
|   | 12ef6aefa8 | ||
|   | e93aa81aa6 | ||
|   | 755eb0320e | ||
|   | 43ba5456b1 | ||
|   | 6804038d06 | ||
|   | 2f799533ae | ||
|   | 88ae5991cd | ||
|   | 5d51a883c2 | ||
|   | c4a91be726 | ||
|   | 56c7366547 | 
							
								
								
									
										10
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -17,4 +17,12 @@ youtube-dl.tar.gz | ||||
| .coverage | ||||
| cover/ | ||||
| updates_key.pem | ||||
| *.egg-info | ||||
| *.egg-info | ||||
| *.srt | ||||
| *.sbv | ||||
| *.vtt | ||||
| *.flv | ||||
| *.mp4 | ||||
| *.part | ||||
| test/testdata | ||||
| .tox | ||||
|   | ||||
							
								
								
									
										37
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								README.md
									
									
									
									
									
								
							| @@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     -U, --update               update this program to latest version. Make sure | ||||
|                                that you have sufficient permissions (run with | ||||
|                                sudo if needed) | ||||
|     -i, --ignore-errors        continue on download errors | ||||
|     -i, --ignore-errors        continue on download errors, for example to to | ||||
|                                skip unavailable videos in a playlist | ||||
|     --dump-user-agent          display the current browser identification | ||||
|     --user-agent UA            specify a custom user agent | ||||
|     --referer REF              specify a custom referer, use if the video access | ||||
| @@ -29,6 +30,11 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --extractor-descriptions   Output descriptions of all supported extractors | ||||
|     --proxy URL                Use the specified HTTP/HTTPS proxy | ||||
|     --no-check-certificate     Suppress HTTPS certificate validation. | ||||
|     --cache-dir None           Location in the filesystem where youtube-dl can | ||||
|                                store downloaded information permanently. By | ||||
|                                default $XDG_CACHE_HOME/youtube-dl or ~/.cache | ||||
|                                /youtube-dl . | ||||
|     --no-cache-dir             Disable filesystem caching | ||||
|  | ||||
| ## Video Selection: | ||||
|     --playlist-start NUMBER    playlist video to start at (default is 1) | ||||
| @@ -45,6 +51,10 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --date DATE                download only videos uploaded in this date | ||||
|     --datebefore DATE          download only videos uploaded before this date | ||||
|     --dateafter DATE           download only videos uploaded after this date | ||||
|     --no-playlist              download only the currently playing video | ||||
|     --age-limit YEARS          download only videos suitable for the given age | ||||
|     --download-archive FILE    Download only videos not present in the archive | ||||
|                                file. Record all downloaded videos in it. | ||||
|  | ||||
| ## Download Options: | ||||
|     -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m) | ||||
| @@ -113,25 +123,26 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|  | ||||
| ## Video Format Options: | ||||
|     -f, --format FORMAT        video format code, specifiy the order of | ||||
|                                preference using slashes: "-f 22/17/18" | ||||
|                                preference using slashes: "-f 22/17/18". "-f mp4" | ||||
|                                and "-f flv" are also supported | ||||
|     --all-formats              download all available video formats | ||||
|     --prefer-free-formats      prefer free video formats unless a specific one | ||||
|                                is requested | ||||
|     --max-quality FORMAT       highest quality format to download | ||||
|     -F, --list-formats         list all available formats (currently youtube | ||||
|                                only) | ||||
|     --write-sub                write subtitle file (currently youtube only) | ||||
|     --write-auto-sub           write automatic subtitle file (currently youtube | ||||
|                                only) | ||||
|     --only-sub                 [deprecated] alias of --skip-download | ||||
|  | ||||
| ## Subtitle Options: | ||||
|     --write-sub                write subtitle file | ||||
|     --write-auto-sub           write automatic subtitle file (youtube only) | ||||
|     --all-subs                 downloads all the available subtitles of the | ||||
|                                video (currently youtube only) | ||||
|                                video | ||||
|     --list-subs                lists all available subtitles for the video | ||||
|                                (currently youtube only) | ||||
|     --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt) | ||||
|                                (currently youtube only) | ||||
|     --sub-lang LANG            language of the subtitles to download (optional) | ||||
|                                use IETF language tags like 'en' | ||||
|     --sub-format FORMAT        subtitle format (default=srt) ([sbv/vtt] youtube | ||||
|                                only) | ||||
|     --sub-lang LANGS           languages of the subtitles to download (optional) | ||||
|                                separated by commas, use IETF language tags like | ||||
|                                'en,pt' | ||||
|  | ||||
| ## Authentication Options: | ||||
|     -u, --username USERNAME    account username | ||||
| @@ -153,6 +164,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                processing; the video is erased by default | ||||
|     --no-post-overwrites       do not overwrite post-processed files; the post- | ||||
|                                processed files are overwritten by default | ||||
|     --embed-subs               embed subtitles in the video (only for mp4 | ||||
|                                videos) | ||||
|  | ||||
| # CONFIGURATION | ||||
|  | ||||
|   | ||||
| @@ -4,8 +4,12 @@ __youtube-dl() | ||||
|     COMPREPLY=() | ||||
|     cur="${COMP_WORDS[COMP_CWORD]}" | ||||
|     opts="{{flags}}" | ||||
|     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater" | ||||
|  | ||||
|     if [[ ${cur} == * ]] ; then | ||||
|     if [[ ${cur} =~ : ]]; then | ||||
|         COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) | ||||
|         return 0 | ||||
|     elif [[ ${cur} == * ]] ; then | ||||
|         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) | ||||
|         return 0 | ||||
|     fi | ||||
|   | ||||
							
								
								
									
										405
									
								
								devscripts/buildserver.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										405
									
								
								devscripts/buildserver.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,405 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| from http.server import HTTPServer, BaseHTTPRequestHandler | ||||
| from socketserver import ThreadingMixIn | ||||
| import argparse | ||||
| import ctypes | ||||
| import functools | ||||
| import sys | ||||
| import threading | ||||
| import traceback | ||||
| import os.path | ||||
|  | ||||
|  | ||||
| class BuildHTTPServer(ThreadingMixIn, HTTPServer): | ||||
|     allow_reuse_address = True | ||||
|  | ||||
|  | ||||
| advapi32 = ctypes.windll.advapi32 | ||||
|  | ||||
| SC_MANAGER_ALL_ACCESS = 0xf003f | ||||
| SC_MANAGER_CREATE_SERVICE = 0x02 | ||||
| SERVICE_WIN32_OWN_PROCESS = 0x10 | ||||
| SERVICE_AUTO_START = 0x2 | ||||
| SERVICE_ERROR_NORMAL = 0x1 | ||||
| DELETE = 0x00010000 | ||||
| SERVICE_STATUS_START_PENDING = 0x00000002 | ||||
| SERVICE_STATUS_RUNNING = 0x00000004 | ||||
| SERVICE_ACCEPT_STOP = 0x1 | ||||
|  | ||||
| SVCNAME = 'youtubedl_builder' | ||||
|  | ||||
| LPTSTR = ctypes.c_wchar_p | ||||
| START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) | ||||
|  | ||||
|  | ||||
| class SERVICE_TABLE_ENTRY(ctypes.Structure): | ||||
|     _fields_ = [ | ||||
|         ('lpServiceName', LPTSTR), | ||||
|         ('lpServiceProc', START_CALLBACK) | ||||
|     ] | ||||
|  | ||||
|  | ||||
| HandlerEx = ctypes.WINFUNCTYPE( | ||||
|     ctypes.c_int,     # return | ||||
|     ctypes.c_int,     # dwControl | ||||
|     ctypes.c_int,     # dwEventType | ||||
|     ctypes.c_void_p,  # lpEventData, | ||||
|     ctypes.c_void_p,  # lpContext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def _ctypes_array(c_type, py_array): | ||||
|     ar = (c_type * len(py_array))() | ||||
|     ar[:] = py_array | ||||
|     return ar | ||||
|  | ||||
|  | ||||
| def win_OpenSCManager(): | ||||
|     res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) | ||||
|     if not res: | ||||
|         raise Exception('Opening service manager failed - ' | ||||
|                         'are you running this as administrator?') | ||||
|     return res | ||||
|  | ||||
|  | ||||
| def win_install_service(service_name, cmdline): | ||||
|     manager = win_OpenSCManager() | ||||
|     try: | ||||
|         h = advapi32.CreateServiceW( | ||||
|             manager, service_name, None, | ||||
|             SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, | ||||
|             SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, | ||||
|             cmdline, None, None, None, None, None) | ||||
|         if not h: | ||||
|             raise OSError('Service creation failed: %s' % ctypes.FormatError()) | ||||
|  | ||||
|         advapi32.CloseServiceHandle(h) | ||||
|     finally: | ||||
|         advapi32.CloseServiceHandle(manager) | ||||
|  | ||||
|  | ||||
| def win_uninstall_service(service_name): | ||||
|     manager = win_OpenSCManager() | ||||
|     try: | ||||
|         h = advapi32.OpenServiceW(manager, service_name, DELETE) | ||||
|         if not h: | ||||
|             raise OSError('Could not find service %s: %s' % ( | ||||
|                 service_name, ctypes.FormatError())) | ||||
|  | ||||
|         try: | ||||
|             if not advapi32.DeleteService(h): | ||||
|                 raise OSError('Deletion failed: %s' % ctypes.FormatError()) | ||||
|         finally: | ||||
|             advapi32.CloseServiceHandle(h) | ||||
|     finally: | ||||
|         advapi32.CloseServiceHandle(manager) | ||||
|  | ||||
|  | ||||
| def win_service_report_event(service_name, msg, is_error=True): | ||||
|     with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: | ||||
|         f.write(msg + '\n') | ||||
|  | ||||
|     event_log = advapi32.RegisterEventSourceW(None, service_name) | ||||
|     if not event_log: | ||||
|         raise OSError('Could not report event: %s' % ctypes.FormatError()) | ||||
|  | ||||
|     try: | ||||
|         type_id = 0x0001 if is_error else 0x0004 | ||||
|         event_id = 0xc0000000 if is_error else 0x40000000 | ||||
|         lines = _ctypes_array(LPTSTR, [msg]) | ||||
|  | ||||
|         if not advapi32.ReportEventW( | ||||
|                 event_log, type_id, 0, event_id, None, len(lines), 0, | ||||
|                 lines, None): | ||||
|             raise OSError('Event reporting failed: %s' % ctypes.FormatError()) | ||||
|     finally: | ||||
|         advapi32.DeregisterEventSource(event_log) | ||||
|  | ||||
|  | ||||
| def win_service_handler(stop_event, *args): | ||||
|     try: | ||||
|         raise ValueError('Handler called with args ' + repr(args)) | ||||
|         TODO | ||||
|     except Exception as e: | ||||
|         tb = traceback.format_exc() | ||||
|         msg = str(e) + '\n' + tb | ||||
|         win_service_report_event(service_name, msg, is_error=True) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| def win_service_set_status(handle, status_code): | ||||
|     svcStatus = SERVICE_STATUS() | ||||
|     svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS | ||||
|     svcStatus.dwCurrentState = status_code | ||||
|     svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP | ||||
|  | ||||
|     svcStatus.dwServiceSpecificExitCode = 0 | ||||
|  | ||||
|     if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): | ||||
|         raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) | ||||
|  | ||||
|  | ||||
| def win_service_main(service_name, real_main, argc, argv_raw): | ||||
|     try: | ||||
|         #args = [argv_raw[i].value for i in range(argc)] | ||||
|         stop_event = threading.Event() | ||||
|         handler = HandlerEx(functools.partial(stop_event, win_service_handler)) | ||||
|         h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) | ||||
|         if not h: | ||||
|             raise OSError('Handler registration failed: %s' % | ||||
|                           ctypes.FormatError()) | ||||
|  | ||||
|         TODO | ||||
|     except Exception as e: | ||||
|         tb = traceback.format_exc() | ||||
|         msg = str(e) + '\n' + tb | ||||
|         win_service_report_event(service_name, msg, is_error=True) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| def win_service_start(service_name, real_main): | ||||
|     try: | ||||
|         cb = START_CALLBACK( | ||||
|             functools.partial(win_service_main, service_name, real_main)) | ||||
|         dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ | ||||
|             SERVICE_TABLE_ENTRY( | ||||
|                 service_name, | ||||
|                 cb | ||||
|             ), | ||||
|             SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) | ||||
|         ]) | ||||
|  | ||||
|         if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): | ||||
|             raise OSError('ctypes start failed: %s' % ctypes.FormatError()) | ||||
|     except Exception as e: | ||||
|         tb = traceback.format_exc() | ||||
|         msg = str(e) + '\n' + tb | ||||
|         win_service_report_event(service_name, msg, is_error=True) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| def main(args=None): | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument('-i', '--install', | ||||
|                         action='store_const', dest='action', const='install', | ||||
|                         help='Launch at Windows startup') | ||||
|     parser.add_argument('-u', '--uninstall', | ||||
|                         action='store_const', dest='action', const='uninstall', | ||||
|                         help='Remove Windows service') | ||||
|     parser.add_argument('-s', '--service', | ||||
|                         action='store_const', dest='action', const='service', | ||||
|                         help='Run as a Windows service') | ||||
|     parser.add_argument('-b', '--bind', metavar='<host:port>', | ||||
|                         action='store', default='localhost:8142', | ||||
|                         help='Bind to host:port (default %default)') | ||||
|     options = parser.parse_args(args=args) | ||||
|  | ||||
|     if options.action == 'install': | ||||
|         fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') | ||||
|         cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) | ||||
|         win_install_service(SVCNAME, cmdline) | ||||
|         return | ||||
|  | ||||
|     if options.action == 'uninstall': | ||||
|         win_uninstall_service(SVCNAME) | ||||
|         return | ||||
|  | ||||
|     if options.action == 'service': | ||||
|         win_service_start(SVCNAME, main) | ||||
|         return | ||||
|  | ||||
|     host, port_str = options.bind.split(':') | ||||
|     port = int(port_str) | ||||
|  | ||||
|     print('Listening on %s:%d' % (host, port)) | ||||
|     srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) | ||||
|     thr = threading.Thread(target=srv.serve_forever) | ||||
|     thr.start() | ||||
|     input('Press ENTER to shut down') | ||||
|     srv.shutdown() | ||||
|     thr.join() | ||||
|  | ||||
|  | ||||
| def rmtree(path): | ||||
|     for name in os.listdir(path): | ||||
|         fname = os.path.join(path, name) | ||||
|         if os.path.isdir(fname): | ||||
|             rmtree(fname) | ||||
|         else: | ||||
|             os.chmod(fname, 0o666) | ||||
|             os.remove(fname) | ||||
|     os.rmdir(path) | ||||
|  | ||||
| #============================================================================== | ||||
|  | ||||
| class BuildError(Exception): | ||||
|     def __init__(self, output, code=500): | ||||
|         self.output = output | ||||
|         self.code = code | ||||
|  | ||||
|     def __str__(self): | ||||
|         return self.output | ||||
|  | ||||
|  | ||||
| class HTTPError(BuildError): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class PythonBuilder(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         pythonVersion = kwargs.pop('python', '2.7') | ||||
|         try: | ||||
|             key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) | ||||
|             try: | ||||
|                 self.pythonPath, _ = _winreg.QueryValueEx(key, '') | ||||
|             finally: | ||||
|                 _winreg.CloseKey(key) | ||||
|         except Exception: | ||||
|             raise BuildError('No such Python version: %s' % pythonVersion) | ||||
|  | ||||
|         super(PythonBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|  | ||||
| class GITInfoBuilder(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         try: | ||||
|             self.user, self.repoName = kwargs['path'][:2] | ||||
|             self.rev = kwargs.pop('rev') | ||||
|         except ValueError: | ||||
|             raise BuildError('Invalid path') | ||||
|         except KeyError as e: | ||||
|             raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) | ||||
|  | ||||
|         path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) | ||||
|         if not os.path.exists(path): | ||||
|             os.makedirs(path) | ||||
|         self.basePath = tempfile.mkdtemp(dir=path) | ||||
|         self.buildPath = os.path.join(self.basePath, 'build') | ||||
|  | ||||
|         super(GITInfoBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|  | ||||
| class GITBuilder(GITInfoBuilder): | ||||
|     def build(self): | ||||
|         try: | ||||
|             subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) | ||||
|             subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) | ||||
|         except subprocess.CalledProcessError as e: | ||||
|             raise BuildError(e.output) | ||||
|  | ||||
|         super(GITBuilder, self).build() | ||||
|  | ||||
|  | ||||
| class YoutubeDLBuilder(object): | ||||
|     authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] | ||||
|  | ||||
|     def __init__(self, **kwargs): | ||||
|         if self.repoName != 'youtube-dl': | ||||
|             raise BuildError('Invalid repository "%s"' % self.repoName) | ||||
|         if self.user not in self.authorizedUsers: | ||||
|             raise HTTPError('Unauthorized user "%s"' % self.user, 401) | ||||
|  | ||||
|         super(YoutubeDLBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|     def build(self): | ||||
|         try: | ||||
|             subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], | ||||
|                                     cwd=self.buildPath) | ||||
|         except subprocess.CalledProcessError as e: | ||||
|             raise BuildError(e.output) | ||||
|  | ||||
|         super(YoutubeDLBuilder, self).build() | ||||
|  | ||||
|  | ||||
| class DownloadBuilder(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         self.handler = kwargs.pop('handler') | ||||
|         self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) | ||||
|         self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) | ||||
|         if not self.srcPath.startswith(self.buildPath): | ||||
|             raise HTTPError(self.srcPath, 401) | ||||
|  | ||||
|         super(DownloadBuilder, self).__init__(**kwargs) | ||||
|  | ||||
|     def build(self): | ||||
|         if not os.path.exists(self.srcPath): | ||||
|             raise HTTPError('No such file', 404) | ||||
|         if os.path.isdir(self.srcPath): | ||||
|             raise HTTPError('Is a directory: %s' % self.srcPath, 401) | ||||
|  | ||||
|         self.handler.send_response(200) | ||||
|         self.handler.send_header('Content-Type', 'application/octet-stream') | ||||
|         self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) | ||||
|         self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) | ||||
|         self.handler.end_headers() | ||||
|  | ||||
|         with open(self.srcPath, 'rb') as src: | ||||
|             shutil.copyfileobj(src, self.handler.wfile) | ||||
|  | ||||
|         super(DownloadBuilder, self).build() | ||||
|  | ||||
|  | ||||
| class CleanupTempDir(object): | ||||
|     def build(self): | ||||
|         try: | ||||
|             rmtree(self.basePath) | ||||
|         except Exception as e: | ||||
|             print('WARNING deleting "%s": %s' % (self.basePath, e)) | ||||
|  | ||||
|         super(CleanupTempDir, self).build() | ||||
|  | ||||
|  | ||||
| class Null(object): | ||||
|     def __init__(self, **kwargs): | ||||
|         pass | ||||
|  | ||||
|     def start(self): | ||||
|         pass | ||||
|  | ||||
|     def close(self): | ||||
|         pass | ||||
|  | ||||
|     def build(self): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class BuildHTTPRequestHandler(BaseHTTPRequestHandler): | ||||
|     actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching. | ||||
|  | ||||
|     def do_GET(self): | ||||
|         path = urlparse.urlparse(self.path) | ||||
|         paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) | ||||
|         action, _, path = path.path.strip('/').partition('/') | ||||
|         if path: | ||||
|             path = path.split('/') | ||||
|             if action in self.actionDict: | ||||
|                 try: | ||||
|                     builder = self.actionDict[action](path=path, handler=self, **paramDict) | ||||
|                     builder.start() | ||||
|                     try: | ||||
|                         builder.build() | ||||
|                     finally: | ||||
|                         builder.close() | ||||
|                 except BuildError as e: | ||||
|                     self.send_response(e.code) | ||||
|                     msg = unicode(e).encode('UTF-8') | ||||
|                     self.send_header('Content-Type', 'text/plain; charset=UTF-8') | ||||
|                     self.send_header('Content-Length', len(msg)) | ||||
|                     self.end_headers() | ||||
|                     self.wfile.write(msg) | ||||
|                 except HTTPError as e: | ||||
|                     self.send_response(e.code, str(e)) | ||||
|             else: | ||||
|                 self.send_response(500, 'Unknown build method "%s"' % action) | ||||
|         else: | ||||
|             self.send_response(500, 'Malformed URL') | ||||
|  | ||||
| #============================================================================== | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
| @@ -3,31 +3,40 @@ | ||||
| import json | ||||
| import sys | ||||
| import hashlib | ||||
| import urllib.request | ||||
| import os.path | ||||
|  | ||||
|  | ||||
| if len(sys.argv) <= 1: | ||||
| 	print('Specify the version number as parameter') | ||||
| 	sys.exit() | ||||
|     print('Specify the version number as parameter') | ||||
|     sys.exit() | ||||
| version = sys.argv[1] | ||||
|  | ||||
| with open('update/LATEST_VERSION', 'w') as f: | ||||
| 	f.write(version) | ||||
|     f.write(version) | ||||
|  | ||||
| versions_info = json.load(open('update/versions.json')) | ||||
| if 'signature' in versions_info: | ||||
| 	del versions_info['signature'] | ||||
|     del versions_info['signature'] | ||||
|  | ||||
| new_version = {} | ||||
|  | ||||
| filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version} | ||||
| filenames = { | ||||
|     'bin': 'youtube-dl', | ||||
|     'exe': 'youtube-dl.exe', | ||||
|     'tar': 'youtube-dl-%s.tar.gz' % version} | ||||
| build_dir = os.path.join('..', '..', 'build', version) | ||||
| for key, filename in filenames.items(): | ||||
| 	print('Downloading and checksumming %s...' %filename) | ||||
| 	url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename) | ||||
| 	data = urllib.request.urlopen(url).read() | ||||
| 	sha256sum = hashlib.sha256(data).hexdigest() | ||||
| 	new_version[key] = (url, sha256sum) | ||||
|     url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) | ||||
|     fn = os.path.join(build_dir, filename) | ||||
|     with open(fn, 'rb') as f: | ||||
|         data = f.read() | ||||
|     if not data: | ||||
|         raise ValueError('File %s is empty!' % fn) | ||||
|     sha256sum = hashlib.sha256(data).hexdigest() | ||||
|     new_version[key] = (url, sha256sum) | ||||
|  | ||||
| versions_info['versions'][version] = new_version | ||||
| versions_info['latest'] = version | ||||
|  | ||||
| json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True) | ||||
| with open('update/versions.json', 'w') as jsonf: | ||||
|     json.dump(versions_info, jsonf, indent=4, sort_keys=True) | ||||
|   | ||||
| @@ -22,7 +22,7 @@ entry_template=textwrap.dedent(""" | ||||
| 									<atom:link href="http://rg3.github.io/youtube-dl" /> | ||||
| 									<atom:content type="xhtml"> | ||||
| 										<div xmlns="http://www.w3.org/1999/xhtml"> | ||||
| 											Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a> | ||||
| 											Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a> | ||||
| 										</div> | ||||
| 									</atom:content> | ||||
| 									<atom:author> | ||||
| @@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str) | ||||
| with open('update/releases.atom','w',encoding='utf-8') as atom_file: | ||||
| 	atom_file.write(atom_template) | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										33
									
								
								devscripts/gh-pages/update-sites.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										33
									
								
								devscripts/gh-pages/update-sites.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import sys | ||||
| import os | ||||
| import textwrap | ||||
|  | ||||
| # We must be able to import youtube_dl | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) | ||||
|  | ||||
| import youtube_dl | ||||
|  | ||||
| def main(): | ||||
|     with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: | ||||
|         template = tmplf.read() | ||||
|  | ||||
|     ie_htmls = [] | ||||
|     for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): | ||||
|         ie_html = '<b>{}</b>'.format(ie.IE_NAME) | ||||
|         try: | ||||
|             ie_html += ': {}'.format(ie.IE_DESC) | ||||
|         except AttributeError: | ||||
|             pass | ||||
|         if ie.working() == False: | ||||
|             ie_html += ' (Currently broken)' | ||||
|         ie_htmls.append('<li>{}</li>'.format(ie_html)) | ||||
|  | ||||
|     template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) | ||||
|  | ||||
|     with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: | ||||
|         sitesf.write(template) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
| @@ -55,8 +55,8 @@ git push origin "$version" | ||||
| /bin/echo -e "\n### OK, now it is time to build the binaries..." | ||||
| REV=$(git rev-parse HEAD) | ||||
| make youtube-dl youtube-dl.tar.gz | ||||
| wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \ | ||||
| 	wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe | ||||
| read -p "VM running? (y/n) " -n 1 | ||||
| wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe | ||||
| mkdir -p "build/$version" | ||||
| mv youtube-dl youtube-dl.exe "build/$version" | ||||
| mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" | ||||
| @@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" | ||||
| (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) | ||||
| git checkout HEAD -- youtube-dl youtube-dl.exe | ||||
|  | ||||
| /bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..." | ||||
| /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." | ||||
| for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done | ||||
| scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ | ||||
| ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" | ||||
| @@ -85,6 +85,7 @@ ROOT=$(pwd) | ||||
|     "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" | ||||
|     "$ROOT/devscripts/gh-pages/generate-download.py" | ||||
|     "$ROOT/devscripts/gh-pages/update-copyright.py" | ||||
|     "$ROOT/devscripts/gh-pages/update-sites.py" | ||||
|     git add *.html *.html.in update | ||||
|     git commit -m "release $version" | ||||
|     git show HEAD | ||||
|   | ||||
| @@ -1,92 +0,0 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| # Generate youtube signature algorithm from test cases | ||||
|  | ||||
| import sys | ||||
|  | ||||
| tests = [ | ||||
|     # 92 - vflQw-fB4 2013/07/17 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"", | ||||
|      "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"), | ||||
|     # 90 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", | ||||
|      "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), | ||||
|     # 88 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", | ||||
|      "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), | ||||
|     # 87 - vflART1Nf 2013/07/24 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", | ||||
|      "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"), | ||||
|     # 86 - vfl_ymO4Z 2013/06/27 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", | ||||
|      "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), | ||||
|     # 85 - vflSAFCP9 2013/07/19 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", | ||||
|      "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), | ||||
|     # 84 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", | ||||
|      "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), | ||||
|     # 83 - vflcaqGO8 2013/07/11 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", | ||||
|      "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"), | ||||
|     # 82 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", | ||||
|      "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), | ||||
|     # 81 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", | ||||
|      "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."), | ||||
| ] | ||||
|  | ||||
| def find_matching(wrong, right): | ||||
|     idxs = [wrong.index(c) for c in right] | ||||
|     return compress(idxs) | ||||
|     return ('s[%d]' % i for i in idxs) | ||||
|  | ||||
| def compress(idxs): | ||||
|     def _genslice(start, end, step): | ||||
|         starts = '' if start == 0 else str(start) | ||||
|         ends = ':%d' % (end+step) | ||||
|         steps = '' if step == 1 else (':%d' % step) | ||||
|         return 's[%s%s%s]' % (starts, ends, steps) | ||||
|  | ||||
|     step = None | ||||
|     for i, prev in zip(idxs[1:], idxs[:-1]): | ||||
|         if step is not None: | ||||
|             if i - prev == step: | ||||
|                 continue | ||||
|             yield _genslice(start, prev, step) | ||||
|             step = None | ||||
|             continue | ||||
|         if i - prev in [-1, 1]: | ||||
|             step = i - prev | ||||
|             start = prev | ||||
|             continue | ||||
|         else: | ||||
|             yield 's[%d]' % prev | ||||
|     if step is None: | ||||
|         yield 's[%d]' % i | ||||
|     else: | ||||
|         yield _genslice(start, i, step) | ||||
|  | ||||
| def _assert_compress(inp, exp): | ||||
|     res = list(compress(inp)) | ||||
|     if res != exp: | ||||
|         print('Got %r, expected %r' % (res, exp)) | ||||
|         assert res == exp | ||||
| _assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]']) | ||||
| _assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]']) | ||||
| _assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]']) | ||||
|  | ||||
| def gen(wrong, right, indent): | ||||
|     code = ' + '.join(find_matching(wrong, right)) | ||||
|     return 'if len(s) == %d:\n%s    return %s\n' % (len(wrong), indent, code) | ||||
|  | ||||
| def genall(tests): | ||||
|     indent = ' ' * 8 | ||||
|     return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests) | ||||
|  | ||||
| def main(): | ||||
|     print(genall(tests)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
| @@ -1,6 +1,9 @@ | ||||
| import errno | ||||
| import io | ||||
| import json | ||||
| import os.path | ||||
| import re | ||||
| import types | ||||
|  | ||||
| import youtube_dl.extractor | ||||
| from youtube_dl import YoutubeDL, YoutubeDLHandler | ||||
| @@ -20,19 +23,41 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para | ||||
| with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: | ||||
|     parameters = json.load(pf) | ||||
|  | ||||
|  | ||||
| def try_rm(filename): | ||||
|     """ Remove a file if it exists """ | ||||
|     try: | ||||
|         os.remove(filename) | ||||
|     except OSError as ose: | ||||
|         if ose.errno != errno.ENOENT: | ||||
|             raise | ||||
|  | ||||
|  | ||||
| class FakeYDL(YoutubeDL): | ||||
|     def __init__(self): | ||||
|         self.result = [] | ||||
|         # Different instances of the downloader can't share the same dictionary | ||||
|         # some test set the "sublang" parameter, which would break the md5 checks. | ||||
|         self.params = dict(parameters) | ||||
|     def to_screen(self, s): | ||||
|         params = dict(parameters) | ||||
|         super(FakeYDL, self).__init__(params) | ||||
|         self.result = [] | ||||
|          | ||||
|     def to_screen(self, s, skip_eol=None): | ||||
|         print(s) | ||||
|  | ||||
|     def trouble(self, s, tb=None): | ||||
|         raise Exception(s) | ||||
|  | ||||
|     def download(self, x): | ||||
|         self.result.append(x) | ||||
|  | ||||
|     def expect_warning(self, regex): | ||||
|         # Silence an expected warning matching a regex | ||||
|         old_report_warning = self.report_warning | ||||
|         def report_warning(self, message): | ||||
|             if re.match(regex, message): return | ||||
|             old_report_warning(message) | ||||
|         self.report_warning = types.MethodType(report_warning, self) | ||||
|  | ||||
| def get_testcases(): | ||||
|     for ie in youtube_dl.extractor.gen_extractors(): | ||||
|         t = getattr(ie, '_TEST', None) | ||||
|   | ||||
| @@ -38,7 +38,6 @@ | ||||
|     "writedescription": false,  | ||||
|     "writeinfojson": true,  | ||||
|     "writesubtitles": false, | ||||
|     "onlysubtitles": false, | ||||
|     "allsubtitles": false, | ||||
|     "listssubtitles": false | ||||
| } | ||||
|   | ||||
							
								
								
									
										53
									
								
								test/test_age_restriction.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								test/test_age_restriction.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl import YoutubeDL | ||||
| from helper import try_rm | ||||
|  | ||||
|  | ||||
| def _download_restricted(url, filename, age): | ||||
|     """ Returns true iff the file has been downloaded """ | ||||
|  | ||||
|     params = { | ||||
|         'age_limit': age, | ||||
|         'skip_download': True, | ||||
|         'writeinfojson': True, | ||||
|         "outtmpl": "%(id)s.%(ext)s", | ||||
|     } | ||||
|     ydl = YoutubeDL(params) | ||||
|     ydl.add_default_info_extractors() | ||||
|     json_filename = filename + '.info.json' | ||||
|     try_rm(json_filename) | ||||
|     ydl.download([url]) | ||||
|     res = os.path.exists(json_filename) | ||||
|     try_rm(json_filename) | ||||
|     return res | ||||
|  | ||||
|  | ||||
| class TestAgeRestriction(unittest.TestCase): | ||||
|     def _assert_restricted(self, url, filename, age, old_age=None): | ||||
|         self.assertTrue(_download_restricted(url, filename, old_age)) | ||||
|         self.assertFalse(_download_restricted(url, filename, age)) | ||||
|  | ||||
|     def test_youtube(self): | ||||
|         self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) | ||||
|  | ||||
|     def test_youporn(self): | ||||
|         self._assert_restricted( | ||||
|             'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', | ||||
|             '505835.mp4', 2, old_age=25) | ||||
|  | ||||
|     def test_pornotube(self): | ||||
|         self._assert_restricted( | ||||
|             'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', | ||||
|             '1689755.flv', 13) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -11,24 +11,50 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, | ||||
| from helper import get_testcases | ||||
|  | ||||
| class TestAllURLsMatching(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.ies = gen_extractors() | ||||
|  | ||||
|     def matching_ies(self, url): | ||||
|         return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] | ||||
|  | ||||
|     def assertMatch(self, url, ie_list): | ||||
|         self.assertEqual(self.matching_ies(url), ie_list) | ||||
|  | ||||
|     def test_youtube_playlist_matching(self): | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585 | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')) | ||||
|         self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 | ||||
|         self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M')) | ||||
|         assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) | ||||
|         assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585 | ||||
|         assertPlaylist(u'PL63F0C78739B09958') | ||||
|         assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | ||||
|         assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | ||||
|         assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668 | ||||
|         self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M')) | ||||
|  | ||||
|     def test_youtube_matching(self): | ||||
|         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) | ||||
|         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 | ||||
|         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) | ||||
|         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) | ||||
|         self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) | ||||
|  | ||||
|     def test_youtube_channel_matching(self): | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM')) | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')) | ||||
|         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')) | ||||
|         assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||
|  | ||||
|     def test_youtube_user_matching(self): | ||||
|         self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) | ||||
|  | ||||
|     def test_youtube_feeds(self): | ||||
|         self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later']) | ||||
|         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) | ||||
|         self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) | ||||
|         self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) | ||||
|  | ||||
|     def test_youtube_show_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) | ||||
|  | ||||
|     def test_justin_tv_channelid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||
| @@ -47,9 +73,13 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) | ||||
|  | ||||
|     def test_youtube_extract(self): | ||||
|         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') | ||||
|         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id) | ||||
|         assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') | ||||
|         assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') | ||||
|         assertExtractId('BaW_jenozKc', 'BaW_jenozKc') | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
| @@ -62,15 +92,12 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|                     self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) | ||||
|  | ||||
|     def test_keywords(self): | ||||
|         ies = gen_extractors() | ||||
|         matching_ies = lambda url: [ie.IE_NAME for ie in ies | ||||
|                                     if ie.suitable(url) and ie.IE_NAME != 'generic'] | ||||
|         self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions']) | ||||
|         self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions']) | ||||
|         self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral']) | ||||
|         self.assertEqual(matching_ies(':tds'), ['ComedyCentral']) | ||||
|         self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral']) | ||||
|         self.assertEqual(matching_ies(':cr'), ['ComedyCentral']) | ||||
|         self.assertMatch(':ytsubs', ['youtube:subscriptions']) | ||||
|         self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) | ||||
|         self.assertMatch(':thedailyshow', ['ComedyCentral']) | ||||
|         self.assertMatch(':tds', ['ComedyCentral']) | ||||
|         self.assertMatch(':colbertreport', ['ComedyCentral']) | ||||
|         self.assertMatch(':cr', ['ComedyCentral']) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   | ||||
							
								
								
									
										72
									
								
								test/test_dailymotion_subtitles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								test/test_dailymotion_subtitles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
| import hashlib | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import DailymotionIE | ||||
| from youtube_dl.utils import * | ||||
| from helper import FakeYDL | ||||
|  | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestDailymotionSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.DL = FakeYDL() | ||||
|         self.url = 'http://www.dailymotion.com/video/xczg00' | ||||
|     def getInfoDict(self): | ||||
|         IE = DailymotionIE(self.DL) | ||||
|         info_dict = IE.extract(self.url) | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles'] | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 5) | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning(u'Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning(u'Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning(u'video doesn\'t have subtitles') | ||||
|         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -1,6 +1,5 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import errno | ||||
| import hashlib | ||||
| import io | ||||
| import os | ||||
| @@ -28,14 +27,6 @@ opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, You | ||||
| compat_urllib_request.install_opener(opener) | ||||
| socket.setdefaulttimeout(10) | ||||
|  | ||||
| def _try_rm(filename): | ||||
|     """ Remove a file if it exists """ | ||||
|     try: | ||||
|         os.remove(filename) | ||||
|     except OSError as ose: | ||||
|         if ose.errno != errno.ENOENT: | ||||
|             raise | ||||
|  | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class YoutubeDL(youtube_dl.YoutubeDL): | ||||
| @@ -54,7 +45,7 @@ def _file_md5(fn): | ||||
|     with open(fn, 'rb') as f: | ||||
|         return hashlib.md5(f.read()).hexdigest() | ||||
|  | ||||
| from helper import get_testcases | ||||
| from helper import get_testcases, try_rm | ||||
| defs = get_testcases() | ||||
|  | ||||
| with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: | ||||
| @@ -97,9 +88,9 @@ def generator(test_case): | ||||
|  | ||||
|         test_cases = test_case.get('playlist', [test_case]) | ||||
|         for tc in test_cases: | ||||
|             _try_rm(tc['file']) | ||||
|             _try_rm(tc['file'] + '.part') | ||||
|             _try_rm(tc['file'] + '.info.json') | ||||
|             try_rm(tc['file']) | ||||
|             try_rm(tc['file'] + '.part') | ||||
|             try_rm(tc['file'] + '.info.json') | ||||
|         try: | ||||
|             for retry in range(1, RETRIES + 1): | ||||
|                 try: | ||||
| @@ -127,12 +118,11 @@ def generator(test_case): | ||||
|                     info_dict = json.load(infof) | ||||
|                 for (info_field, expected) in tc.get('info_dict', {}).items(): | ||||
|                     if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|                         self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field))) | ||||
|                         got = 'md5:' + md5(info_dict.get(info_field)) | ||||
|                     else: | ||||
|                         got = info_dict.get(info_field) | ||||
|                         self.assertEqual( | ||||
|                             expected, got, | ||||
|                             u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||
|                     self.assertEqual(expected, got, | ||||
|                         u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||
|  | ||||
|                 # If checkable fields are missing from the test case, print the info_dict | ||||
|                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||
| @@ -146,9 +136,9 @@ def generator(test_case): | ||||
|                     self.assertTrue(key in info_dict.keys() and info_dict[key]) | ||||
|         finally: | ||||
|             for tc in test_cases: | ||||
|                 _try_rm(tc['file']) | ||||
|                 _try_rm(tc['file'] + '.part') | ||||
|                 _try_rm(tc['file'] + '.info.json') | ||||
|                 try_rm(tc['file']) | ||||
|                 try_rm(tc['file'] + '.part') | ||||
|                 try_rm(tc['file'] + '.info.json') | ||||
|  | ||||
|     return test_template | ||||
|  | ||||
|   | ||||
							
								
								
									
										78
									
								
								test/test_playlists.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								test/test_playlists.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| #!/usr/bin/env python | ||||
| # encoding: utf-8 | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
| import json | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import ( | ||||
|     DailymotionPlaylistIE, | ||||
|     DailymotionUserIE, | ||||
|     VimeoChannelIE, | ||||
|     UstreamChannelIE, | ||||
|     SoundcloudUserIE, | ||||
|     LivestreamIE, | ||||
| ) | ||||
| from youtube_dl.utils import * | ||||
|  | ||||
| from helper import FakeYDL | ||||
|  | ||||
| class TestPlaylists(unittest.TestCase): | ||||
|     def assertIsPlaylist(self, info): | ||||
|         """Make sure the info has '_type' set to 'playlist'""" | ||||
|         self.assertEqual(info['_type'], 'playlist') | ||||
|  | ||||
|     def test_dailymotion_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = DailymotionPlaylistIE(dl) | ||||
|         result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'SPORT') | ||||
|         self.assertTrue(len(result['entries']) > 20) | ||||
|  | ||||
|     def test_dailymotion_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = DailymotionUserIE(dl) | ||||
|         result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'Génération Quoi') | ||||
|         self.assertTrue(len(result['entries']) >= 26) | ||||
|  | ||||
|     def test_vimeo_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = VimeoChannelIE(dl) | ||||
|         result = ie.extract('http://vimeo.com/channels/tributes') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'Vimeo Tributes') | ||||
|         self.assertTrue(len(result['entries']) > 24) | ||||
|  | ||||
|     def test_ustream_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = UstreamChannelIE(dl) | ||||
|         result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], u'5124905') | ||||
|         self.assertTrue(len(result['entries']) >= 11) | ||||
|  | ||||
|     def test_soundcloud_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = SoundcloudUserIE(dl) | ||||
|         result = ie.extract('https://soundcloud.com/the-concept-band') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], u'9615865') | ||||
|         self.assertTrue(len(result['entries']) >= 12) | ||||
|  | ||||
|     def test_livestream_event(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = LivestreamIE(dl) | ||||
|         result = ie.extract('http://new.livestream.com/tedx/cityenglish') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], u'TEDCity2.0 (English)') | ||||
|         self.assertTrue(len(result['entries']) >= 4) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -11,13 +11,16 @@ import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| #from youtube_dl.utils import htmlentity_transform | ||||
| from youtube_dl.utils import timeconvert | ||||
| from youtube_dl.utils import sanitize_filename | ||||
| from youtube_dl.utils import unescapeHTML | ||||
| from youtube_dl.utils import orderedSet | ||||
| from youtube_dl.utils import DateRange | ||||
| from youtube_dl.utils import unified_strdate | ||||
| from youtube_dl.utils import find_xpath_attr | ||||
| from youtube_dl.utils import ( | ||||
|     timeconvert, | ||||
|     sanitize_filename, | ||||
|     unescapeHTML, | ||||
|     orderedSet, | ||||
|     DateRange, | ||||
|     unified_strdate, | ||||
|     find_xpath_attr, | ||||
|     get_meta_content, | ||||
| ) | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
|     _compat_str = lambda b: b.decode('unicode-escape') | ||||
| @@ -127,5 +130,16 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) | ||||
|         self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) | ||||
|  | ||||
|     def test_meta_parser(self): | ||||
|         testhtml = u''' | ||||
|         <head> | ||||
|             <meta name="description" content="foo & bar"> | ||||
|             <meta content='Plato' name='author'/> | ||||
|         </head> | ||||
|         ''' | ||||
|         get_meta = lambda name: get_meta_content(name, testhtml) | ||||
|         self.assertEqual(get_meta('description'), u'foo & bar') | ||||
|         self.assertEqual(get_meta('author'), 'Plato') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -27,6 +27,14 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||
|         self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) | ||||
|  | ||||
|     def test_youtube_playlist_noplaylist(self): | ||||
|         dl = FakeYDL() | ||||
|         dl.params['noplaylist'] = True | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') | ||||
|         self.assertEqual(result['_type'], 'url') | ||||
|         self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg') | ||||
|  | ||||
|     def test_issue_673(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|   | ||||
| @@ -1,67 +0,0 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import unittest | ||||
| import sys | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor.youtube import YoutubeIE | ||||
| from helper import FakeYDL | ||||
|  | ||||
| sig = YoutubeIE(FakeYDL())._decrypt_signature | ||||
|  | ||||
| class TestYoutubeSig(unittest.TestCase): | ||||
|     def test_92(self): | ||||
|         wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" | ||||
|         right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_90(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" | ||||
|         right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_88(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" | ||||
|         right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_87(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" | ||||
|         right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_86(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" | ||||
|         right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_85(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" | ||||
|         right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_84(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" | ||||
|         right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_83(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" | ||||
|         right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_82(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" | ||||
|         right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
|     def test_81(self): | ||||
|         wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>." | ||||
|         right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>." | ||||
|         self.assertEqual(sig(wrong), right) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
							
								
								
									
										80
									
								
								test/test_youtube_signature.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								test/test_youtube_signature.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import io | ||||
| import re | ||||
| import string | ||||
| import sys | ||||
| import unittest | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.utils import compat_str, compat_urlretrieve | ||||
|  | ||||
| _TESTS = [ | ||||
|     ( | ||||
|         u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', | ||||
|         u'js', | ||||
|         86, | ||||
|         u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', | ||||
|     ), | ||||
|     ( | ||||
|         u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', | ||||
|         u'js', | ||||
|         85, | ||||
|         u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', | ||||
|     ), | ||||
|     ( | ||||
|         u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf', | ||||
|         u'swf', | ||||
|         82, | ||||
|         u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321' | ||||
|     ), | ||||
| ] | ||||
|  | ||||
|  | ||||
| class TestSignature(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||
|         self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') | ||||
|         if not os.path.exists(self.TESTDATA_DIR): | ||||
|             os.mkdir(self.TESTDATA_DIR) | ||||
|  | ||||
|  | ||||
| def make_tfunc(url, stype, sig_length, expected_sig): | ||||
|     basename = url.rpartition('/')[2] | ||||
|     m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) | ||||
|     assert m, '%r should follow URL format' % basename | ||||
|     test_id = m.group(1) | ||||
|  | ||||
|     def test_func(self): | ||||
|         fn = os.path.join(self.TESTDATA_DIR, basename) | ||||
|  | ||||
|         if not os.path.exists(fn): | ||||
|             compat_urlretrieve(url, fn) | ||||
|  | ||||
|         ie = YoutubeIE() | ||||
|         if stype == 'js': | ||||
|             with io.open(fn, encoding='utf-8') as testf: | ||||
|                 jscode = testf.read() | ||||
|             func = ie._parse_sig_js(jscode) | ||||
|         else: | ||||
|             assert stype == 'swf' | ||||
|             with open(fn, 'rb') as testf: | ||||
|                 swfcode = testf.read() | ||||
|             func = ie._parse_sig_swf(swfcode) | ||||
|         src_sig = compat_str(string.printable[:sig_length]) | ||||
|         got_sig = func(src_sig) | ||||
|         self.assertEqual(got_sig, expected_sig) | ||||
|  | ||||
|     test_func.__name__ = str('test_signature_' + stype + '_' + test_id) | ||||
|     setattr(TestSignature, test_func.__name__, test_func) | ||||
|  | ||||
| for test_spec in _TESTS: | ||||
|     make_tfunc(*test_spec) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -2,8 +2,6 @@ | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
| import json | ||||
| import io | ||||
| import hashlib | ||||
|  | ||||
| # Allow direct execution | ||||
| @@ -18,78 +16,69 @@ md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestYoutubeSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = False | ||||
|         DL.params['writesubtitles'] = False | ||||
|         DL.params['subtitlesformat'] = 'srt' | ||||
|         DL.params['listsubtitles'] = False | ||||
|     def test_youtube_no_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = False | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.DL = FakeYDL() | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|     def getInfoDict(self): | ||||
|         IE = YoutubeIE(self.DL) | ||||
|         info_dict = IE.extract(self.url) | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles']         | ||||
|     def test_youtube_no_writesubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = False | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_youtube_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_it(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitleslang'] = 'it' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_onlysubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['onlysubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') | ||||
|     def test_youtube_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') | ||||
|     def test_youtube_allsubtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['allsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         subtitles = info_dict[0]['subtitles'] | ||||
|         self.assertEqual(len(subtitles), 13) | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 13) | ||||
|     def test_youtube_subtitles_sbv_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'sbv' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitlesformat'] = 'sbv' | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||||
|     def test_youtube_subtitles_vtt_format(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writesubtitles'] = True | ||||
|         DL.params['subtitlesformat'] = 'vtt' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitlesformat'] = 'vtt' | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') | ||||
|     def test_youtube_list_subtitles(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['listsubtitles'] = True | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('QRS8MkLhQmM') | ||||
|         self.DL.expect_warning(u'Video doesn\'t have automatic captions') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|     def test_youtube_automatic_captions(self): | ||||
|         DL = FakeYDL() | ||||
|         DL.params['writeautomaticsub'] = True | ||||
|         DL.params['subtitleslang'] = 'it' | ||||
|         IE = YoutubeIE(DL) | ||||
|         info_dict = IE.extract('8YoUxe5ncPo') | ||||
|         sub = info_dict[0]['subtitles'][0] | ||||
|         self.assertTrue(sub[2] is not None) | ||||
|         self.url = '8YoUxe5ncPo' | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(subtitles['it'] is not None) | ||||
|     def test_youtube_nosubtitles(self): | ||||
|         self.DL.expect_warning(u'video doesn\'t have subtitles') | ||||
|         self.url = 'sAjKT8FhjI8' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|     def test_youtube_multiple_langs(self): | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['it', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
							
								
								
									
										5
									
								
								tox.ini
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								tox.ini
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| [tox] | ||||
| envlist = py26,py27,py33 | ||||
| [testenv] | ||||
| deps = nose | ||||
| commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test | ||||
| @@ -63,32 +63,57 @@ class FileDownloader(object): | ||||
|         converted = float(bytes) / float(1024 ** exponent) | ||||
|         return '%.2f%s' % (converted, suffix) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_seconds(seconds): | ||||
|         (mins, secs) = divmod(seconds, 60) | ||||
|         (hours, mins) = divmod(mins, 60) | ||||
|         if hours > 99: | ||||
|             return '--:--:--' | ||||
|         if hours == 0: | ||||
|             return '%02d:%02d' % (mins, secs) | ||||
|         else: | ||||
|             return '%02d:%02d:%02d' % (hours, mins, secs) | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_percent(byte_counter, data_len): | ||||
|         if data_len is None: | ||||
|             return None | ||||
|         return float(byte_counter) / float(data_len) * 100.0 | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_percent(percent): | ||||
|         if percent is None: | ||||
|             return '---.-%' | ||||
|         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) | ||||
|         return '%6s' % ('%3.1f%%' % percent) | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_eta(start, now, total, current): | ||||
|         if total is None: | ||||
|             return '--:--' | ||||
|             return None | ||||
|         dif = now - start | ||||
|         if current == 0 or dif < 0.001: # One millisecond | ||||
|             return '--:--' | ||||
|             return None | ||||
|         rate = float(current) / dif | ||||
|         eta = int((float(total) - float(current)) / rate) | ||||
|         (eta_mins, eta_secs) = divmod(eta, 60) | ||||
|         if eta_mins > 99: | ||||
|         return int((float(total) - float(current)) / rate) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_eta(eta): | ||||
|         if eta is None: | ||||
|             return '--:--' | ||||
|         return '%02d:%02d' % (eta_mins, eta_secs) | ||||
|         return FileDownloader.format_seconds(eta) | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_speed(start, now, bytes): | ||||
|         dif = now - start | ||||
|         if bytes == 0 or dif < 0.001: # One millisecond | ||||
|             return None | ||||
|         return float(bytes) / dif | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_speed(speed): | ||||
|         if speed is None: | ||||
|             return '%10s' % '---b/s' | ||||
|         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) | ||||
|         return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed)) | ||||
|  | ||||
|     @staticmethod | ||||
|     def best_block_size(elapsed_time, bytes): | ||||
| @@ -197,11 +222,14 @@ class FileDownloader(object): | ||||
|         """Report destination filename.""" | ||||
|         self.to_screen(u'[download] Destination: ' + filename) | ||||
|  | ||||
|     def report_progress(self, percent_str, data_len_str, speed_str, eta_str): | ||||
|     def report_progress(self, percent, data_len_str, speed, eta): | ||||
|         """Report download progress.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|             return | ||||
|         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') | ||||
|         eta_str = self.format_eta(eta) | ||||
|         percent_str = self.format_percent(percent) | ||||
|         speed_str = self.format_speed(speed) | ||||
|         if self.params.get('progress_with_newline', False): | ||||
|             self.to_screen(u'[download] %s of %s at %s ETA %s' % | ||||
|                 (percent_str, data_len_str, speed_str, eta_str)) | ||||
| @@ -230,12 +258,14 @@ class FileDownloader(object): | ||||
|         """Report it was impossible to resume download.""" | ||||
|         self.to_screen(u'[download] Unable to resume') | ||||
|  | ||||
|     def report_finish(self): | ||||
|     def report_finish(self, data_len_str, tot_time): | ||||
|         """Report download finished.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|             self.to_screen(u'[download] Download completed') | ||||
|         else: | ||||
|             self.to_screen(u'') | ||||
|             clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') | ||||
|             self.to_screen(u'\r%s[download] 100%% of %s in %s' % | ||||
|                 (clear_line, data_len_str, self.format_seconds(tot_time))) | ||||
|  | ||||
|     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): | ||||
|         self.report_destination(filename) | ||||
| @@ -329,6 +359,35 @@ class FileDownloader(object): | ||||
|             self.report_error(u'mplayer exited with code %d' % retval) | ||||
|             return False | ||||
|  | ||||
|     def _download_m3u8_with_ffmpeg(self, filename, url): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename] | ||||
|         # Check for ffmpeg first | ||||
|         try: | ||||
|             subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|         except (OSError, IOError): | ||||
|             self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] ) | ||||
|             return False | ||||
|  | ||||
|         retval = subprocess.call(args) | ||||
|         if retval == 0: | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
|                 'total_bytes': fsize, | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr(u"\n") | ||||
|             self.report_error(u'ffmpeg exited with code %d' % retval) | ||||
|             return False | ||||
|  | ||||
|  | ||||
|     def _do_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
| @@ -339,6 +398,7 @@ class FileDownloader(object): | ||||
|             self._hook_progress({ | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|                 'total_bytes': os.path.getsize(encodeFilename(filename)), | ||||
|             }) | ||||
|             return True | ||||
|  | ||||
| @@ -354,6 +414,10 @@ class FileDownloader(object): | ||||
|         if url.startswith('mms') or url.startswith('rtsp'): | ||||
|             return self._download_with_mplayer(filename, url) | ||||
|  | ||||
|         # m3u8 manifest are downloaded with ffmpeg | ||||
|         if determine_ext(url) == u'm3u8': | ||||
|             return self._download_m3u8_with_ffmpeg(filename, url) | ||||
|  | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         stream = None | ||||
|  | ||||
| @@ -481,13 +545,14 @@ class FileDownloader(object): | ||||
|                 block_size = self.best_block_size(after - before, len(data_block)) | ||||
|  | ||||
|             # Progress message | ||||
|             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) | ||||
|             speed = self.calc_speed(start, time.time(), byte_counter - resume_len) | ||||
|             if data_len is None: | ||||
|                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') | ||||
|                 eta = None | ||||
|             else: | ||||
|                 percent_str = self.calc_percent(byte_counter, data_len) | ||||
|                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | ||||
|                 self.report_progress(percent_str, data_len_str, speed_str, eta_str) | ||||
|                 percent = self.calc_percent(byte_counter, data_len) | ||||
|                 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | ||||
|                 self.report_progress(percent, data_len_str, speed, eta) | ||||
|  | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': byte_counter, | ||||
| @@ -495,6 +560,8 @@ class FileDownloader(object): | ||||
|                 'tmpfilename': tmpfilename, | ||||
|                 'filename': filename, | ||||
|                 'status': 'downloading', | ||||
|                 'eta': eta, | ||||
|                 'speed': speed, | ||||
|             }) | ||||
|  | ||||
|             # Apply rate limit | ||||
| @@ -505,7 +572,7 @@ class FileDownloader(object): | ||||
|             self.report_error(u'Did not get any data blocks') | ||||
|             return False | ||||
|         stream.close() | ||||
|         self.report_finish() | ||||
|         self.report_finish(data_len_str, (time.time() - start)) | ||||
|         if data_len is not None and byte_counter != data_len: | ||||
|             raise ContentTooShortError(byte_counter, int(data_len)) | ||||
|         self.try_rename(tmpfilename, filename) | ||||
| @@ -537,6 +604,8 @@ class FileDownloader(object): | ||||
|         * downloaded_bytes: Bytes on disks | ||||
|         * total_bytes: Total bytes, None if unknown | ||||
|         * tmpfilename: The filename we're currently writing to | ||||
|         * eta: The estimated time in seconds, None if unknown | ||||
|         * speed: The download speed in bytes/second, None if unknown | ||||
|  | ||||
|         Hooks are guaranteed to be called at least once (with status "finished") | ||||
|         if the download is successful. | ||||
|   | ||||
| @@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] | ||||
|         return dict((program, executable(program)) for program in programs) | ||||
|  | ||||
|     def run_ffmpeg(self, path, out_path, opts): | ||||
|     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): | ||||
|         if not self._exes['ffmpeg'] and not self._exes['avconv']: | ||||
|             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') | ||||
|         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)] | ||||
|  | ||||
|         files_cmd = [] | ||||
|         for path in input_paths: | ||||
|             files_cmd.extend(['-i', encodeFilename(path)]) | ||||
|         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd | ||||
|                + opts + | ||||
|                [encodeFilename(self._ffmpeg_filename_argument(out_path))]) | ||||
|  | ||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         stdout,stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
| @@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|             msg = stderr.strip().split('\n')[-1] | ||||
|             raise FFmpegPostProcessorError(msg) | ||||
|  | ||||
|     def run_ffmpeg(self, path, out_path, opts): | ||||
|         self.run_ffmpeg_multiple_files([path], out_path, opts) | ||||
|  | ||||
|     def _ffmpeg_filename_argument(self, fn): | ||||
|         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details | ||||
|         if fn.startswith(u'-'): | ||||
| @@ -100,7 +108,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|         self._nopostoverwrites = nopostoverwrites | ||||
|  | ||||
|     def get_audio_codec(self, path): | ||||
|         if not self._exes['ffprobe'] and not self._exes['avprobe']: return None | ||||
|         if not self._exes['ffprobe'] and not self._exes['avprobe']: | ||||
|             raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.') | ||||
|         try: | ||||
|             cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))] | ||||
|             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) | ||||
| @@ -128,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|         try: | ||||
|             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) | ||||
|         except FFmpegPostProcessorError as err: | ||||
|             raise AudioConversionError(err.message) | ||||
|             raise AudioConversionError(err.msg) | ||||
|  | ||||
|     def run(self, information): | ||||
|         path = information['filepath'] | ||||
| @@ -198,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|         except: | ||||
|             etype,e,tb = sys.exc_info() | ||||
|             if isinstance(e, AudioConversionError): | ||||
|                 msg = u'audio conversion failed: ' + e.message | ||||
|                 msg = u'audio conversion failed: ' + e.msg | ||||
|             else: | ||||
|                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') | ||||
|             raise PostProcessingError(msg) | ||||
| @@ -208,7 +217,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|             try: | ||||
|                 os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) | ||||
|             except: | ||||
|                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') | ||||
|                 self._downloader.report_warning(u'Cannot update utime of audio file') | ||||
|  | ||||
|         information['filepath'] = new_path | ||||
|         return self._nopostoverwrites,information | ||||
| @@ -231,3 +240,230 @@ class FFmpegVideoConvertor(FFmpegPostProcessor): | ||||
|         information['format'] = self._preferedformat | ||||
|         information['ext'] = self._preferedformat | ||||
|         return False,information | ||||
|  | ||||
|  | ||||
| class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): | ||||
|     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt | ||||
|     _lang_map = { | ||||
|         'aa': 'aar', | ||||
|         'ab': 'abk', | ||||
|         'ae': 'ave', | ||||
|         'af': 'afr', | ||||
|         'ak': 'aka', | ||||
|         'am': 'amh', | ||||
|         'an': 'arg', | ||||
|         'ar': 'ara', | ||||
|         'as': 'asm', | ||||
|         'av': 'ava', | ||||
|         'ay': 'aym', | ||||
|         'az': 'aze', | ||||
|         'ba': 'bak', | ||||
|         'be': 'bel', | ||||
|         'bg': 'bul', | ||||
|         'bh': 'bih', | ||||
|         'bi': 'bis', | ||||
|         'bm': 'bam', | ||||
|         'bn': 'ben', | ||||
|         'bo': 'bod', | ||||
|         'br': 'bre', | ||||
|         'bs': 'bos', | ||||
|         'ca': 'cat', | ||||
|         'ce': 'che', | ||||
|         'ch': 'cha', | ||||
|         'co': 'cos', | ||||
|         'cr': 'cre', | ||||
|         'cs': 'ces', | ||||
|         'cu': 'chu', | ||||
|         'cv': 'chv', | ||||
|         'cy': 'cym', | ||||
|         'da': 'dan', | ||||
|         'de': 'deu', | ||||
|         'dv': 'div', | ||||
|         'dz': 'dzo', | ||||
|         'ee': 'ewe', | ||||
|         'el': 'ell', | ||||
|         'en': 'eng', | ||||
|         'eo': 'epo', | ||||
|         'es': 'spa', | ||||
|         'et': 'est', | ||||
|         'eu': 'eus', | ||||
|         'fa': 'fas', | ||||
|         'ff': 'ful', | ||||
|         'fi': 'fin', | ||||
|         'fj': 'fij', | ||||
|         'fo': 'fao', | ||||
|         'fr': 'fra', | ||||
|         'fy': 'fry', | ||||
|         'ga': 'gle', | ||||
|         'gd': 'gla', | ||||
|         'gl': 'glg', | ||||
|         'gn': 'grn', | ||||
|         'gu': 'guj', | ||||
|         'gv': 'glv', | ||||
|         'ha': 'hau', | ||||
|         'he': 'heb', | ||||
|         'hi': 'hin', | ||||
|         'ho': 'hmo', | ||||
|         'hr': 'hrv', | ||||
|         'ht': 'hat', | ||||
|         'hu': 'hun', | ||||
|         'hy': 'hye', | ||||
|         'hz': 'her', | ||||
|         'ia': 'ina', | ||||
|         'id': 'ind', | ||||
|         'ie': 'ile', | ||||
|         'ig': 'ibo', | ||||
|         'ii': 'iii', | ||||
|         'ik': 'ipk', | ||||
|         'io': 'ido', | ||||
|         'is': 'isl', | ||||
|         'it': 'ita', | ||||
|         'iu': 'iku', | ||||
|         'ja': 'jpn', | ||||
|         'jv': 'jav', | ||||
|         'ka': 'kat', | ||||
|         'kg': 'kon', | ||||
|         'ki': 'kik', | ||||
|         'kj': 'kua', | ||||
|         'kk': 'kaz', | ||||
|         'kl': 'kal', | ||||
|         'km': 'khm', | ||||
|         'kn': 'kan', | ||||
|         'ko': 'kor', | ||||
|         'kr': 'kau', | ||||
|         'ks': 'kas', | ||||
|         'ku': 'kur', | ||||
|         'kv': 'kom', | ||||
|         'kw': 'cor', | ||||
|         'ky': 'kir', | ||||
|         'la': 'lat', | ||||
|         'lb': 'ltz', | ||||
|         'lg': 'lug', | ||||
|         'li': 'lim', | ||||
|         'ln': 'lin', | ||||
|         'lo': 'lao', | ||||
|         'lt': 'lit', | ||||
|         'lu': 'lub', | ||||
|         'lv': 'lav', | ||||
|         'mg': 'mlg', | ||||
|         'mh': 'mah', | ||||
|         'mi': 'mri', | ||||
|         'mk': 'mkd', | ||||
|         'ml': 'mal', | ||||
|         'mn': 'mon', | ||||
|         'mr': 'mar', | ||||
|         'ms': 'msa', | ||||
|         'mt': 'mlt', | ||||
|         'my': 'mya', | ||||
|         'na': 'nau', | ||||
|         'nb': 'nob', | ||||
|         'nd': 'nde', | ||||
|         'ne': 'nep', | ||||
|         'ng': 'ndo', | ||||
|         'nl': 'nld', | ||||
|         'nn': 'nno', | ||||
|         'no': 'nor', | ||||
|         'nr': 'nbl', | ||||
|         'nv': 'nav', | ||||
|         'ny': 'nya', | ||||
|         'oc': 'oci', | ||||
|         'oj': 'oji', | ||||
|         'om': 'orm', | ||||
|         'or': 'ori', | ||||
|         'os': 'oss', | ||||
|         'pa': 'pan', | ||||
|         'pi': 'pli', | ||||
|         'pl': 'pol', | ||||
|         'ps': 'pus', | ||||
|         'pt': 'por', | ||||
|         'qu': 'que', | ||||
|         'rm': 'roh', | ||||
|         'rn': 'run', | ||||
|         'ro': 'ron', | ||||
|         'ru': 'rus', | ||||
|         'rw': 'kin', | ||||
|         'sa': 'san', | ||||
|         'sc': 'srd', | ||||
|         'sd': 'snd', | ||||
|         'se': 'sme', | ||||
|         'sg': 'sag', | ||||
|         'si': 'sin', | ||||
|         'sk': 'slk', | ||||
|         'sl': 'slv', | ||||
|         'sm': 'smo', | ||||
|         'sn': 'sna', | ||||
|         'so': 'som', | ||||
|         'sq': 'sqi', | ||||
|         'sr': 'srp', | ||||
|         'ss': 'ssw', | ||||
|         'st': 'sot', | ||||
|         'su': 'sun', | ||||
|         'sv': 'swe', | ||||
|         'sw': 'swa', | ||||
|         'ta': 'tam', | ||||
|         'te': 'tel', | ||||
|         'tg': 'tgk', | ||||
|         'th': 'tha', | ||||
|         'ti': 'tir', | ||||
|         'tk': 'tuk', | ||||
|         'tl': 'tgl', | ||||
|         'tn': 'tsn', | ||||
|         'to': 'ton', | ||||
|         'tr': 'tur', | ||||
|         'ts': 'tso', | ||||
|         'tt': 'tat', | ||||
|         'tw': 'twi', | ||||
|         'ty': 'tah', | ||||
|         'ug': 'uig', | ||||
|         'uk': 'ukr', | ||||
|         'ur': 'urd', | ||||
|         'uz': 'uzb', | ||||
|         've': 'ven', | ||||
|         'vi': 'vie', | ||||
|         'vo': 'vol', | ||||
|         'wa': 'wln', | ||||
|         'wo': 'wol', | ||||
|         'xh': 'xho', | ||||
|         'yi': 'yid', | ||||
|         'yo': 'yor', | ||||
|         'za': 'zha', | ||||
|         'zh': 'zho', | ||||
|         'zu': 'zul', | ||||
|     } | ||||
|  | ||||
|     def __init__(self, downloader=None, subtitlesformat='srt'): | ||||
|         super(FFmpegEmbedSubtitlePP, self).__init__(downloader) | ||||
|         self._subformat = subtitlesformat | ||||
|  | ||||
|     @classmethod | ||||
|     def _conver_lang_code(cls, code): | ||||
|         """Convert language code from ISO 639-1 to ISO 639-2/T""" | ||||
|         return cls._lang_map.get(code[:2]) | ||||
|  | ||||
|     def run(self, information): | ||||
|         if information['ext'] != u'mp4': | ||||
|             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') | ||||
|             return True, information | ||||
|         if not information.get('subtitles'): | ||||
|             self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')  | ||||
|             return True, information | ||||
|  | ||||
|         sub_langs = [key for key in information['subtitles']] | ||||
|         filename = information['filepath'] | ||||
|         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] | ||||
|  | ||||
|         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy'] | ||||
|         for (i, lang) in enumerate(sub_langs): | ||||
|             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text']) | ||||
|             lang_code = self._conver_lang_code(lang) | ||||
|             if lang_code is not None: | ||||
|                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) | ||||
|         opts.extend(['-f', 'mp4']) | ||||
|  | ||||
|         temp_filename = filename + u'.temp' | ||||
|         self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename) | ||||
|         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) | ||||
|         os.remove(encodeFilename(filename)) | ||||
|         os.rename(encodeFilename(temp_filename), encodeFilename(filename)) | ||||
|  | ||||
|         return True, information | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
|  | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| import errno | ||||
| import io | ||||
| import os | ||||
| import re | ||||
| @@ -74,12 +75,21 @@ class YoutubeDL(object): | ||||
|     writesubtitles:    Write the video subtitles to a file | ||||
|     writeautomaticsub: Write the automatic subtitles to a file | ||||
|     allsubtitles:      Downloads all the subtitles of the video | ||||
|                        (requires writesubtitles or writeautomaticsub) | ||||
|     listsubtitles:     Lists all available subtitles for the video | ||||
|     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt) | ||||
|     subtitleslang:     Language of the subtitles to download | ||||
|     subtitleslangs:    List of languages of the subtitles to download | ||||
|     keepvideo:         Keep the video file after post-processing | ||||
|     daterange:         A DateRange object, download only if the upload_date is in the range. | ||||
|     skip_download:     Skip the actual download of the video file | ||||
|     cachedir:          Location of the cache files in the filesystem. | ||||
|                        None to disable filesystem cache. | ||||
|     noplaylist:        Download single video instead of a playlist if in doubt. | ||||
|     age_limit:         An integer representing the user's age in years. | ||||
|                        Unsuitable videos for the given age are skipped. | ||||
|     downloadarchive:   File name of a file where all downloads are recorded. | ||||
|                        Videos already present in the file are not downloaded | ||||
|                        again. | ||||
|      | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
|     the FileDownloader: | ||||
| @@ -97,11 +107,23 @@ class YoutubeDL(object): | ||||
|     def __init__(self, params): | ||||
|         """Create a FileDownloader object with the given options.""" | ||||
|         self._ies = [] | ||||
|         self._ies_instances = {} | ||||
|         self._pps = [] | ||||
|         self._progress_hooks = [] | ||||
|         self._download_retcode = 0 | ||||
|         self._num_downloads = 0 | ||||
|         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] | ||||
|  | ||||
|         if (sys.version_info >= (3,) and sys.platform != 'win32' and | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] | ||||
|                 and not params['restrictfilenames']): | ||||
|             # On Python 3, the Unicode filesystem API will throw errors (#1474) | ||||
|             self.report_warning( | ||||
|                 u'Assuming --restrict-filenames isnce file system encoding ' | ||||
|                 u'cannot encode all charactes. ' | ||||
|                 u'Set the LC_ALL environment variable to fix this.') | ||||
|             params['restrictfilenames'] = True | ||||
|  | ||||
|         self.params = params | ||||
|         self.fd = FileDownloader(self, self.params) | ||||
|  | ||||
| @@ -111,8 +133,21 @@ class YoutubeDL(object): | ||||
|     def add_info_extractor(self, ie): | ||||
|         """Add an InfoExtractor object to the end of the list.""" | ||||
|         self._ies.append(ie) | ||||
|         self._ies_instances[ie.ie_key()] = ie | ||||
|         ie.set_downloader(self) | ||||
|  | ||||
|     def get_info_extractor(self, ie_key): | ||||
|         """ | ||||
|         Get an instance of an IE with name ie_key, it will try to get one from | ||||
|         the _ies list, if there's no instance it will create a new one and add | ||||
|         it to the extractor list. | ||||
|         """ | ||||
|         ie = self._ies_instances.get(ie_key) | ||||
|         if ie is None: | ||||
|             ie = get_info_extractor(ie_key)() | ||||
|             self.add_info_extractor(ie) | ||||
|         return ie | ||||
|  | ||||
|     def add_default_info_extractors(self): | ||||
|         """ | ||||
|         Add the InfoExtractors returned by gen_extractors to the end of the list | ||||
| @@ -127,14 +162,10 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def to_screen(self, message, skip_eol=False): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
|         assert type(message) == type(u'') | ||||
|         if not self.params.get('quiet', False): | ||||
|             terminator = [u'\n', u''][skip_eol] | ||||
|             output = message + terminator | ||||
|             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | ||||
|                 output = output.encode(preferredencoding(), 'ignore') | ||||
|             self._screen_file.write(output) | ||||
|             self._screen_file.flush() | ||||
|             write_string(output, self._screen_file) | ||||
|  | ||||
|     def to_stderr(self, message): | ||||
|         """Print message to stderr.""" | ||||
| @@ -264,7 +295,7 @@ class YoutubeDL(object): | ||||
|             self.report_error(u'Erroneous output template') | ||||
|             return None | ||||
|         except ValueError as err: | ||||
|             self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) | ||||
|             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')') | ||||
|             return None | ||||
|  | ||||
|     def _match_entry(self, info_dict): | ||||
| @@ -284,6 +315,13 @@ class YoutubeDL(object): | ||||
|             dateRange = self.params.get('daterange', DateRange()) | ||||
|             if date not in dateRange: | ||||
|                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) | ||||
|         age_limit = self.params.get('age_limit') | ||||
|         if age_limit is not None: | ||||
|             if age_limit < info_dict.get('age_limit', 0): | ||||
|                 return u'Skipping "' + title + '" because it is age restricted' | ||||
|         if self.in_download_archive(info_dict): | ||||
|             return (u'%(title)s has already been recorded in archive' | ||||
|                     % info_dict) | ||||
|         return None | ||||
|          | ||||
|     def extract_info(self, url, download=True, ie_key=None, extra_info={}): | ||||
| @@ -294,9 +332,7 @@ class YoutubeDL(object): | ||||
|          ''' | ||||
|          | ||||
|         if ie_key: | ||||
|             ie = get_info_extractor(ie_key)() | ||||
|             ie.set_downloader(self) | ||||
|             ies = [ie] | ||||
|             ies = [self.get_info_extractor(ie_key)] | ||||
|         else: | ||||
|             ies = self._ies | ||||
|  | ||||
| @@ -448,7 +484,8 @@ class YoutubeDL(object): | ||||
|         if self.params.get('forceid', False): | ||||
|             compat_print(info_dict['id']) | ||||
|         if self.params.get('forceurl', False): | ||||
|             compat_print(info_dict['url']) | ||||
|             # For RTMP URLs, also include the playpath | ||||
|             compat_print(info_dict['url'] + info_dict.get('play_path', u'')) | ||||
|         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: | ||||
|             compat_print(info_dict['thumbnail']) | ||||
|         if self.params.get('forcedescription', False) and 'description' in info_dict: | ||||
| @@ -479,45 +516,33 @@ class YoutubeDL(object): | ||||
|                 self.report_writedescription(descfn) | ||||
|                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: | ||||
|                     descfile.write(info_dict['description']) | ||||
|             except (KeyError, TypeError): | ||||
|                 self.report_warning(u'There\'s no description to write.') | ||||
|             except (OSError, IOError): | ||||
|                 self.report_error(u'Cannot write description file ' + descfn) | ||||
|                 return | ||||
|  | ||||
|         if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|         subtitles_are_requested = any([self.params.get('writesubtitles', False), | ||||
|                                        self.params.get('writeautomaticsub')]) | ||||
|  | ||||
|         if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             subtitle = info_dict['subtitles'][0] | ||||
|             (sub_error, sub_lang, sub) = subtitle | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|             if sub_error: | ||||
|                 self.report_warning("Some error while getting the subtitles") | ||||
|             else: | ||||
|             for sub_lang in subtitles.keys(): | ||||
|                 sub = subtitles[sub_lang] | ||||
|                 if sub is None: | ||||
|                     continue | ||||
|                 try: | ||||
|                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|                     sub_filename = subtitles_filename(filename, sub_lang, sub_format) | ||||
|                     self.report_writesubtitles(sub_filename) | ||||
|                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                         subfile.write(sub) | ||||
|                             subfile.write(sub) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error(u'Cannot write subtitles file ' + descfn) | ||||
|                     return | ||||
|  | ||||
|         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|             for subtitle in subtitles: | ||||
|                 (sub_error, sub_lang, sub) = subtitle | ||||
|                 if sub_error: | ||||
|                     self.report_warning("Some error while getting the subtitles") | ||||
|                 else: | ||||
|                     try: | ||||
|                         sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|                         self.report_writesubtitles(sub_filename) | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                                 subfile.write(sub) | ||||
|                     except (OSError, IOError): | ||||
|                         self.report_error(u'Cannot write subtitles file ' + descfn) | ||||
|                         return | ||||
|  | ||||
|         if self.params.get('writeinfojson', False): | ||||
|             infofn = filename + u'.info.json' | ||||
|             self.report_writeinfojson(infofn) | ||||
| @@ -534,11 +559,15 @@ class YoutubeDL(object): | ||||
|                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format | ||||
|                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' % | ||||
|                                (info_dict['extractor'], info_dict['id'])) | ||||
|                 uf = compat_urllib_request.urlopen(info_dict['thumbnail']) | ||||
|                 with open(thumb_filename, 'wb') as thumbf: | ||||
|                     shutil.copyfileobj(uf, thumbf) | ||||
|                 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % | ||||
|                                (info_dict['extractor'], info_dict['id'], thumb_filename)) | ||||
|                 try: | ||||
|                     uf = compat_urllib_request.urlopen(info_dict['thumbnail']) | ||||
|                     with open(thumb_filename, 'wb') as thumbf: | ||||
|                         shutil.copyfileobj(uf, thumbf) | ||||
|                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % | ||||
|                         (info_dict['extractor'], info_dict['id'], thumb_filename)) | ||||
|                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                     self.report_warning(u'Unable to download thumbnail "%s": %s' % | ||||
|                         (info_dict['thumbnail'], compat_str(err))) | ||||
|  | ||||
|         if not self.params.get('skip_download', False): | ||||
|             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): | ||||
| @@ -546,11 +575,11 @@ class YoutubeDL(object): | ||||
|             else: | ||||
|                 try: | ||||
|                     success = self.fd._do_download(filename, info_dict) | ||||
|                 except (OSError, IOError) as err: | ||||
|                     raise UnavailableVideoError() | ||||
|                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                     self.report_error(u'unable to download video data: %s' % str(err)) | ||||
|                     return | ||||
|                 except (OSError, IOError) as err: | ||||
|                     raise UnavailableVideoError(err) | ||||
|                 except (ContentTooShortError, ) as err: | ||||
|                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) | ||||
|                     return | ||||
| @@ -562,6 +591,8 @@ class YoutubeDL(object): | ||||
|                     self.report_error(u'postprocessing: %s' % str(err)) | ||||
|                     return | ||||
|  | ||||
|         self.record_download_archive(info_dict) | ||||
|  | ||||
|     def download(self, url_list): | ||||
|         """Download a given list of URLs.""" | ||||
|         if len(url_list) > 1 and self.fixed_template(): | ||||
| @@ -594,10 +625,33 @@ class YoutubeDL(object): | ||||
|                         # No clear decision yet, let IE decide | ||||
|                         keep_video = keep_video_wish | ||||
|             except PostProcessingError as e: | ||||
|                 self.to_stderr(u'ERROR: ' + e.msg) | ||||
|                 self.report_error(e.msg) | ||||
|         if keep_video is False and not self.params.get('keepvideo', False): | ||||
|             try: | ||||
|                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) | ||||
|                 os.remove(encodeFilename(filename)) | ||||
|             except (IOError, OSError): | ||||
|                 self.report_warning(u'Unable to remove downloaded video file') | ||||
|  | ||||
|     def in_download_archive(self, info_dict): | ||||
|         fn = self.params.get('download_archive') | ||||
|         if fn is None: | ||||
|             return False | ||||
|         vid_id = info_dict['extractor'] + u' ' + info_dict['id'] | ||||
|         try: | ||||
|             with locked_file(fn, 'r', encoding='utf-8') as archive_file: | ||||
|                 for line in archive_file: | ||||
|                     if line.strip() == vid_id: | ||||
|                         return True | ||||
|         except IOError as ioe: | ||||
|             if ioe.errno != errno.ENOENT: | ||||
|                 raise | ||||
|         return False | ||||
|  | ||||
|     def record_download_archive(self, info_dict): | ||||
|         fn = self.params.get('download_archive') | ||||
|         if fn is None: | ||||
|             return | ||||
|         vid_id = info_dict['extractor'] + u' ' + info_dict['id'] | ||||
|         with locked_file(fn, 'a', encoding='utf-8') as archive_file: | ||||
|             archive_file.write(vid_id + u'\n') | ||||
|   | ||||
| @@ -27,6 +27,10 @@ __authors__  = ( | ||||
|     'Johny Mo Swag', | ||||
|     'Axel Noack', | ||||
|     'Albert Kim', | ||||
|     'Pierre Rudloff', | ||||
|     'Huarong Huo', | ||||
|     'Ismael Mejía', | ||||
|     'Steffan \'Ruirize\' James', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -44,6 +48,7 @@ import sys | ||||
| import warnings | ||||
| import platform | ||||
|  | ||||
|  | ||||
| from .utils import * | ||||
| from .update import update_self | ||||
| from .version import __version__ | ||||
| @@ -82,6 +87,9 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|         return "".join(opts) | ||||
|  | ||||
|     def _comma_separated_values_options_callback(option, opt_str, value, parser): | ||||
|         setattr(parser.values, option.dest, value.split(',')) | ||||
|  | ||||
|     def _find_term_columns(): | ||||
|         columns = os.environ.get('COLUMNS', None) | ||||
|         if columns: | ||||
| @@ -95,6 +103,16 @@ def parseOpts(overrideArguments=None): | ||||
|             pass | ||||
|         return None | ||||
|  | ||||
|     def _hide_login_info(opts): | ||||
|         opts = list(opts) | ||||
|         for private_opt in ['-p', '--password', '-u', '--username']: | ||||
|             try: | ||||
|                 i = opts.index(private_opt) | ||||
|                 opts[i+1] = '<PRIVATE>' | ||||
|             except ValueError: | ||||
|                 pass | ||||
|         return opts | ||||
|  | ||||
|     max_width = 80 | ||||
|     max_help_position = 80 | ||||
|  | ||||
| @@ -119,6 +137,7 @@ def parseOpts(overrideArguments=None): | ||||
|     selection      = optparse.OptionGroup(parser, 'Video Selection') | ||||
|     authentication = optparse.OptionGroup(parser, 'Authentication Options') | ||||
|     video_format   = optparse.OptionGroup(parser, 'Video Format Options') | ||||
|     subtitles      = optparse.OptionGroup(parser, 'Subtitle Options') | ||||
|     downloader     = optparse.OptionGroup(parser, 'Download Options') | ||||
|     postproc       = optparse.OptionGroup(parser, 'Post-processing Options') | ||||
|     filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') | ||||
| @@ -131,7 +150,7 @@ def parseOpts(overrideArguments=None): | ||||
|     general.add_option('-U', '--update', | ||||
|             action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') | ||||
|     general.add_option('-i', '--ignore-errors', | ||||
|             action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) | ||||
|             action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) | ||||
|     general.add_option('--dump-user-agent', | ||||
|             action='store_true', dest='dump_user_agent', | ||||
|             help='display the current browser identification', default=False) | ||||
| @@ -148,6 +167,12 @@ def parseOpts(overrideArguments=None): | ||||
|             help='Output descriptions of all supported extractors', default=False) | ||||
|     general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') | ||||
|     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') | ||||
|     general.add_option( | ||||
|         '--cache-dir', dest='cachedir', default=get_cachedir(), | ||||
|         help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') | ||||
|     general.add_option( | ||||
|         '--no-cache-dir', action='store_const', const=None, dest='cachedir', | ||||
|         help='Disable filesystem caching') | ||||
|  | ||||
|  | ||||
|     selection.add_option('--playlist-start', | ||||
| @@ -162,6 +187,13 @@ def parseOpts(overrideArguments=None): | ||||
|     selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) | ||||
|     selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) | ||||
|     selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) | ||||
|     selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) | ||||
|     selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', | ||||
|                          help='download only videos suitable for the given age', | ||||
|                          default=None, type=int) | ||||
|     selection.add_option('--download-archive', metavar='FILE', | ||||
|                          dest='download_archive', | ||||
|                          help='Download only videos not present in the archive file. Record all downloaded videos in it.') | ||||
|  | ||||
|  | ||||
|     authentication.add_option('-u', '--username', | ||||
| @@ -176,7 +208,7 @@ def parseOpts(overrideArguments=None): | ||||
|  | ||||
|     video_format.add_option('-f', '--format', | ||||
|             action='store', dest='format', metavar='FORMAT', | ||||
|             help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"') | ||||
|             help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') | ||||
|     video_format.add_option('--all-formats', | ||||
|             action='store_const', dest='format', help='download all available video formats', const='all') | ||||
|     video_format.add_option('--prefer-free-formats', | ||||
| @@ -185,27 +217,26 @@ def parseOpts(overrideArguments=None): | ||||
|             action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') | ||||
|     video_format.add_option('-F', '--list-formats', | ||||
|             action='store_true', dest='listformats', help='list all available formats (currently youtube only)') | ||||
|     video_format.add_option('--write-sub', '--write-srt', | ||||
|  | ||||
|     subtitles.add_option('--write-sub', '--write-srt', | ||||
|             action='store_true', dest='writesubtitles', | ||||
|             help='write subtitle file (currently youtube only)', default=False) | ||||
|     video_format.add_option('--write-auto-sub', '--write-automatic-sub', | ||||
|             help='write subtitle file', default=False) | ||||
|     subtitles.add_option('--write-auto-sub', '--write-automatic-sub', | ||||
|             action='store_true', dest='writeautomaticsub', | ||||
|             help='write automatic subtitle file (currently youtube only)', default=False) | ||||
|     video_format.add_option('--only-sub', | ||||
|             action='store_true', dest='skip_download', | ||||
|             help='[deprecated] alias of --skip-download', default=False) | ||||
|     video_format.add_option('--all-subs', | ||||
|             help='write automatic subtitle file (youtube only)', default=False) | ||||
|     subtitles.add_option('--all-subs', | ||||
|             action='store_true', dest='allsubtitles', | ||||
|             help='downloads all the available subtitles of the video (currently youtube only)', default=False) | ||||
|     video_format.add_option('--list-subs', | ||||
|             help='downloads all the available subtitles of the video', default=False) | ||||
|     subtitles.add_option('--list-subs', | ||||
|             action='store_true', dest='listsubtitles', | ||||
|             help='lists all available subtitles for the video (currently youtube only)', default=False) | ||||
|     video_format.add_option('--sub-format', | ||||
|             help='lists all available subtitles for the video', default=False) | ||||
|     subtitles.add_option('--sub-format', | ||||
|             action='store', dest='subtitlesformat', metavar='FORMAT', | ||||
|             help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') | ||||
|     video_format.add_option('--sub-lang', '--srt-lang', | ||||
|             action='store', dest='subtitleslang', metavar='LANG', | ||||
|             help='language of the subtitles to download (optional) use IETF language tags like \'en\'') | ||||
|             help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') | ||||
|     subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', | ||||
|             action='callback', dest='subtitleslangs', metavar='LANGS', type='str', | ||||
|             default=[], callback=_comma_separated_values_options_callback, | ||||
|             help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') | ||||
|  | ||||
|     downloader.add_option('-r', '--rate-limit', | ||||
|             dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') | ||||
| @@ -254,6 +285,10 @@ def parseOpts(overrideArguments=None): | ||||
|     verbosity.add_option('--dump-intermediate-pages', | ||||
|             action='store_true', dest='dump_intermediate_pages', default=False, | ||||
|             help='print downloaded pages to debug problems(very verbose)') | ||||
|     verbosity.add_option('--youtube-print-sig-code', | ||||
|             action='store_true', dest='youtube_print_sig_code', default=False, | ||||
|             help=optparse.SUPPRESS_HELP) | ||||
|  | ||||
|  | ||||
|     filesystem.add_option('-t', '--title', | ||||
|             action='store_true', dest='usetitle', help='use title in file name (default)', default=False) | ||||
| @@ -320,6 +355,8 @@ def parseOpts(overrideArguments=None): | ||||
|             help='keeps the video file on disk after the post-processing; the video is erased by default') | ||||
|     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, | ||||
|             help='do not overwrite post-processed files; the post-processed files are overwritten by default') | ||||
|     postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, | ||||
|             help='embed subtitles in the video (only for mp4 videos)') | ||||
|  | ||||
|  | ||||
|     parser.add_option_group(general) | ||||
| @@ -328,28 +365,33 @@ def parseOpts(overrideArguments=None): | ||||
|     parser.add_option_group(filesystem) | ||||
|     parser.add_option_group(verbosity) | ||||
|     parser.add_option_group(video_format) | ||||
|     parser.add_option_group(subtitles) | ||||
|     parser.add_option_group(authentication) | ||||
|     parser.add_option_group(postproc) | ||||
|  | ||||
|     if overrideArguments is not None: | ||||
|         opts, args = parser.parse_args(overrideArguments) | ||||
|         if opts.verbose: | ||||
|             sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|             write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|     else: | ||||
|         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') | ||||
|         if xdg_config_home: | ||||
|             userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') | ||||
|             userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') | ||||
|             if not os.path.isfile(userConfFile): | ||||
|                 userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') | ||||
|         else: | ||||
|             userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') | ||||
|             userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') | ||||
|             if not os.path.isfile(userConfFile): | ||||
|                 userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') | ||||
|         systemConf = _readOptions('/etc/youtube-dl.conf') | ||||
|         userConf = _readOptions(userConfFile) | ||||
|         commandLineConf = sys.argv[1:]  | ||||
|         commandLineConf = sys.argv[1:] | ||||
|         argv = systemConf + userConf + commandLineConf | ||||
|         opts, args = parser.parse_args(argv) | ||||
|         if opts.verbose: | ||||
|             sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n') | ||||
|             sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n') | ||||
|             sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n') | ||||
|             write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') | ||||
|             write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') | ||||
|             write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') | ||||
|  | ||||
|     return parser, opts, args | ||||
|  | ||||
| @@ -372,12 +414,12 @@ def _real_main(argv=None): | ||||
|         except (IOError, OSError) as err: | ||||
|             if opts.verbose: | ||||
|                 traceback.print_exc() | ||||
|             sys.stderr.write(u'ERROR: unable to open cookie file\n') | ||||
|             write_string(u'ERROR: unable to open cookie file\n') | ||||
|             sys.exit(101) | ||||
|     # Set user agent | ||||
|     if opts.user_agent is not None: | ||||
|         std_headers['User-Agent'] = opts.user_agent | ||||
|      | ||||
|  | ||||
|     # Set referer | ||||
|     if opts.referer is not None: | ||||
|         std_headers['Referer'] = opts.referer | ||||
| @@ -398,6 +440,8 @@ def _real_main(argv=None): | ||||
|             batchurls = batchfd.readlines() | ||||
|             batchurls = [x.strip() for x in batchurls] | ||||
|             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] | ||||
|             if opts.verbose: | ||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') | ||||
|         except IOError: | ||||
|             sys.exit(u'ERROR: batch file could not be read') | ||||
|     all_urls = batchurls + args | ||||
| @@ -418,6 +462,10 @@ def _real_main(argv=None): | ||||
|     proxy_handler = compat_urllib_request.ProxyHandler(proxies) | ||||
|     https_handler = make_HTTPS_handler(opts) | ||||
|     opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) | ||||
|     # Delete the default user-agent header, which would otherwise apply in | ||||
|     # cases where our custom HTTP handler doesn't come into play | ||||
|     # (See https://github.com/rg3/youtube-dl/issues/1309 for details) | ||||
|     opener.addheaders =[] | ||||
|     compat_urllib_request.install_opener(opener) | ||||
|     socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) | ||||
|  | ||||
| @@ -436,6 +484,8 @@ def _real_main(argv=None): | ||||
|             if not ie._WORKING: | ||||
|                 continue | ||||
|             desc = getattr(ie, 'IE_DESC', ie.IE_NAME) | ||||
|             if desc is False: | ||||
|                 continue | ||||
|             if hasattr(ie, 'SEARCH_KEY'): | ||||
|                 _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') | ||||
|                 _COUNTS = (u'', u'5', u'10', u'all') | ||||
| @@ -507,6 +557,11 @@ def _real_main(argv=None): | ||||
|     else: | ||||
|         date = DateRange(opts.dateafter, opts.datebefore) | ||||
|  | ||||
|     # --all-sub automatically sets --write-sub if --write-auto-sub is not given | ||||
|     # this was the old behaviour if only --all-sub was given. | ||||
|     if opts.allsubtitles and (opts.writeautomaticsub == False): | ||||
|         opts.writesubtitles = True | ||||
|  | ||||
|     if sys.version_info < (3,): | ||||
|         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) | ||||
|         if opts.outtmpl is not None: | ||||
| @@ -519,6 +574,10 @@ def _real_main(argv=None): | ||||
|             or (opts.useid and u'%(id)s.%(ext)s') | ||||
|             or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') | ||||
|             or u'%(title)s-%(id)s.%(ext)s') | ||||
|     if '%(ext)s' not in outtmpl and opts.extractaudio: | ||||
|         parser.error(u'Cannot download a video and extract audio into the same' | ||||
|                      u' file! Use "%%(ext)s" instead of %r' % | ||||
|                      determine_ext(outtmpl, u'')) | ||||
|  | ||||
|     # YoutubeDL | ||||
|     ydl = YoutubeDL({ | ||||
| @@ -553,6 +612,7 @@ def _real_main(argv=None): | ||||
|         'progress_with_newline': opts.progress_with_newline, | ||||
|         'playliststart': opts.playliststart, | ||||
|         'playlistend': opts.playlistend, | ||||
|         'noplaylist': opts.noplaylist, | ||||
|         'logtostderr': opts.outtmpl == '-', | ||||
|         'consoletitle': opts.consoletitle, | ||||
|         'nopart': opts.nopart, | ||||
| @@ -565,7 +625,7 @@ def _real_main(argv=None): | ||||
|         'allsubtitles': opts.allsubtitles, | ||||
|         'listsubtitles': opts.listsubtitles, | ||||
|         'subtitlesformat': opts.subtitlesformat, | ||||
|         'subtitleslang': opts.subtitleslang, | ||||
|         'subtitleslangs': opts.subtitleslangs, | ||||
|         'matchtitle': decodeOption(opts.matchtitle), | ||||
|         'rejecttitle': decodeOption(opts.rejecttitle), | ||||
|         'max_downloads': opts.max_downloads, | ||||
| @@ -577,10 +637,14 @@ def _real_main(argv=None): | ||||
|         'min_filesize': opts.min_filesize, | ||||
|         'max_filesize': opts.max_filesize, | ||||
|         'daterange': date, | ||||
|         'cachedir': opts.cachedir, | ||||
|         'youtube_print_sig_code': opts.youtube_print_sig_code, | ||||
|         'age_limit': opts.age_limit, | ||||
|         'download_archive': opts.download_archive, | ||||
|         }) | ||||
|  | ||||
|     if opts.verbose: | ||||
|         sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n') | ||||
|         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') | ||||
|         try: | ||||
|             sp = subprocess.Popen( | ||||
|                 ['git', 'rev-parse', '--short', 'HEAD'], | ||||
| @@ -589,14 +653,14 @@ def _real_main(argv=None): | ||||
|             out, err = sp.communicate() | ||||
|             out = out.decode().strip() | ||||
|             if re.match('[0-9a-f]+', out): | ||||
|                 sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n') | ||||
|                 write_string(u'[debug] Git HEAD: ' + out + u'\n') | ||||
|         except: | ||||
|             try: | ||||
|                 sys.exc_clear() | ||||
|             except: | ||||
|                 pass | ||||
|         sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n') | ||||
|         sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') | ||||
|         write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') | ||||
|         write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') | ||||
|  | ||||
|     ydl.add_default_info_extractors() | ||||
|  | ||||
| @@ -605,10 +669,12 @@ def _real_main(argv=None): | ||||
|         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||
|     if opts.recodevideo: | ||||
|         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||
|     if opts.embedsubtitles: | ||||
|         ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) | ||||
|  | ||||
|     # Update version | ||||
|     if opts.update_self: | ||||
|         update_self(ydl.to_screen, opts.verbose, sys.argv[0]) | ||||
|         update_self(ydl.to_screen, opts.verbose) | ||||
|  | ||||
|     # Maybe do nothing | ||||
|     if len(all_urls) < 1: | ||||
|   | ||||
							
								
								
									
										202
									
								
								youtube_dl/aes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								youtube_dl/aes.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,202 @@ | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text'] | ||||
|  | ||||
| import base64 | ||||
| from math import ceil | ||||
|  | ||||
| from .utils import bytes_to_intlist, intlist_to_bytes | ||||
|  | ||||
| BLOCK_SIZE_BYTES = 16 | ||||
|  | ||||
| def aes_ctr_decrypt(data, key, counter): | ||||
|     """ | ||||
|     Decrypt with aes in counter mode | ||||
|      | ||||
|     @param {int[]} data        cipher | ||||
|     @param {int[]} key         16/24/32-Byte cipher key | ||||
|     @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block) | ||||
|                                returns the next counter block | ||||
|     @returns {int[]}           decrypted data | ||||
|     """ | ||||
|     expanded_key = key_expansion(key) | ||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) | ||||
|      | ||||
|     decrypted_data=[] | ||||
|     for i in range(block_count): | ||||
|         counter_block = counter.next_value() | ||||
|         block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES] | ||||
|         block += [0]*(BLOCK_SIZE_BYTES - len(block)) | ||||
|          | ||||
|         cipher_counter_block = aes_encrypt(counter_block, expanded_key) | ||||
|         decrypted_data += xor(block, cipher_counter_block) | ||||
|     decrypted_data = decrypted_data[:len(data)] | ||||
|      | ||||
|     return decrypted_data | ||||
|  | ||||
| def key_expansion(data): | ||||
|     """ | ||||
|     Generate key schedule | ||||
|      | ||||
|     @param {int[]} data  16/24/32-Byte cipher key | ||||
|     @returns {int[]}     176/208/240-Byte expanded key  | ||||
|     """ | ||||
|     data = data[:] # copy | ||||
|     rcon_iteration = 1 | ||||
|     key_size_bytes = len(data) | ||||
|     expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES | ||||
|      | ||||
|     while len(data) < expanded_key_size_bytes: | ||||
|         temp = data[-4:] | ||||
|         temp = key_schedule_core(temp, rcon_iteration) | ||||
|         rcon_iteration += 1 | ||||
|         data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | ||||
|          | ||||
|         for _ in range(3): | ||||
|             temp = data[-4:] | ||||
|             data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | ||||
|          | ||||
|         if key_size_bytes == 32: | ||||
|             temp = data[-4:] | ||||
|             temp = sub_bytes(temp) | ||||
|             data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | ||||
|          | ||||
|         for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0): | ||||
|             temp = data[-4:] | ||||
|             data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | ||||
|     data = data[:expanded_key_size_bytes] | ||||
|      | ||||
|     return data | ||||
|  | ||||
| def aes_encrypt(data, expanded_key): | ||||
|     """ | ||||
|     Encrypt one block with aes | ||||
|      | ||||
|     @param {int[]} data          16-Byte state | ||||
|     @param {int[]} expanded_key  176/208/240-Byte expanded key  | ||||
|     @returns {int[]}             16-Byte cipher | ||||
|     """ | ||||
|     rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 | ||||
|      | ||||
|     data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) | ||||
|     for i in range(1, rounds+1): | ||||
|         data = sub_bytes(data) | ||||
|         data = shift_rows(data) | ||||
|         if i != rounds: | ||||
|             data = mix_columns(data) | ||||
|         data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) | ||||
|      | ||||
|     return data | ||||
|  | ||||
| def aes_decrypt_text(data, password, key_size_bytes): | ||||
|     """ | ||||
|     Decrypt text | ||||
|     - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter | ||||
|     - The cipher key is retrieved by encrypting the first 16 Byte of 'password' | ||||
|       with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) | ||||
|     - Mode of operation is 'counter' | ||||
|      | ||||
|     @param {str} data                    Base64 encoded string | ||||
|     @param {str,unicode} password        Password (will be encoded with utf-8) | ||||
|     @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit | ||||
|     @returns {str}                       Decrypted data | ||||
|     """ | ||||
|     NONCE_LENGTH_BYTES = 8 | ||||
|      | ||||
|     data = bytes_to_intlist(base64.b64decode(data)) | ||||
|     password = bytes_to_intlist(password.encode('utf-8')) | ||||
|      | ||||
|     key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password)) | ||||
|     key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) | ||||
|      | ||||
|     nonce = data[:NONCE_LENGTH_BYTES] | ||||
|     cipher = data[NONCE_LENGTH_BYTES:] | ||||
|      | ||||
|     class Counter: | ||||
|         __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) | ||||
|         def next_value(self): | ||||
|             temp = self.__value | ||||
|             self.__value = inc(self.__value) | ||||
|             return temp | ||||
|      | ||||
|     decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) | ||||
|     plaintext = intlist_to_bytes(decrypted_data) | ||||
|      | ||||
|     return plaintext | ||||
|  | ||||
| RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) | ||||
| SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, | ||||
|         0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, | ||||
|         0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, | ||||
|         0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, | ||||
|         0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, | ||||
|         0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, | ||||
|         0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, | ||||
|         0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, | ||||
|         0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, | ||||
|         0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, | ||||
|         0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, | ||||
|         0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, | ||||
|         0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, | ||||
|         0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, | ||||
|         0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, | ||||
|         0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) | ||||
| MIX_COLUMN_MATRIX = ((2,3,1,1), | ||||
|                      (1,2,3,1), | ||||
|                      (1,1,2,3), | ||||
|                      (3,1,1,2)) | ||||
|  | ||||
| def sub_bytes(data): | ||||
|     return [SBOX[x] for x in data] | ||||
|  | ||||
| def rotate(data): | ||||
|     return data[1:] + [data[0]] | ||||
|  | ||||
| def key_schedule_core(data, rcon_iteration): | ||||
|     data = rotate(data) | ||||
|     data = sub_bytes(data) | ||||
|     data[0] = data[0] ^ RCON[rcon_iteration] | ||||
|      | ||||
|     return data | ||||
|  | ||||
| def xor(data1, data2): | ||||
|     return [x^y for x, y in zip(data1, data2)] | ||||
|  | ||||
| def mix_column(data): | ||||
|     data_mixed = [] | ||||
|     for row in range(4): | ||||
|         mixed = 0 | ||||
|         for column in range(4): | ||||
|             addend = data[column] | ||||
|             if MIX_COLUMN_MATRIX[row][column] in (2,3): | ||||
|                 addend <<= 1 | ||||
|                 if addend > 0xff: | ||||
|                     addend &= 0xff | ||||
|                     addend ^= 0x1b | ||||
|                 if MIX_COLUMN_MATRIX[row][column] == 3: | ||||
|                     addend ^= data[column] | ||||
|             mixed ^= addend & 0xff | ||||
|         data_mixed.append(mixed) | ||||
|     return data_mixed | ||||
|  | ||||
| def mix_columns(data): | ||||
|     data_mixed = [] | ||||
|     for i in range(4): | ||||
|         column = data[i*4 : (i+1)*4] | ||||
|         data_mixed += mix_column(column) | ||||
|     return data_mixed | ||||
|  | ||||
| def shift_rows(data): | ||||
|     data_shifted = [] | ||||
|     for column in range(4): | ||||
|         for row in range(4): | ||||
|             data_shifted.append( data[((column + row) & 0b11) * 4 + row] ) | ||||
|     return data_shifted | ||||
|  | ||||
| def inc(data): | ||||
|     data = data[:] # copy | ||||
|     for i in range(len(data)-1,-1,-1): | ||||
|         if data[i] == 255: | ||||
|             data[i] = 0 | ||||
|         else: | ||||
|             data[i] = data[i] + 1 | ||||
|             break | ||||
|     return data | ||||
| @@ -1,27 +1,50 @@ | ||||
| from .appletrailers import AppleTrailersIE | ||||
| from .addanime import AddAnimeIE | ||||
| from .archiveorg import ArchiveOrgIE | ||||
| from .ard import ARDIE | ||||
| from .arte import ArteTvIE | ||||
| from .auengine import AUEngineIE | ||||
| from .bandcamp import BandcampIE | ||||
| from .bliptv import BlipTVIE, BlipTVUserIE | ||||
| from .bloomberg import BloombergIE | ||||
| from .breakcom import BreakIE | ||||
| from .brightcove import BrightcoveIE | ||||
| from .c56 import C56IE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cnn import CNNIE | ||||
| from .collegehumor import CollegeHumorIE | ||||
| from .comedycentral import ComedyCentralIE | ||||
| from .condenast import CondeNastIE | ||||
| from .criterion import CriterionIE | ||||
| from .cspan import CSpanIE | ||||
| from .dailymotion import DailymotionIE | ||||
| from .dailymotion import ( | ||||
|     DailymotionIE, | ||||
|     DailymotionPlaylistIE, | ||||
|     DailymotionUserIE, | ||||
| ) | ||||
| from .daum import DaumIE | ||||
| from .depositfiles import DepositFilesIE | ||||
| from .dotsub import DotsubIE | ||||
| from .dreisat import DreiSatIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| from .ebaumsworld import EbaumsWorldIE | ||||
| from .ehow import EHowIE | ||||
| from .eighttracks import EightTracksIE | ||||
| from .escapist import EscapistIE | ||||
| from .exfm import ExfmIE | ||||
| from .facebook import FacebookIE | ||||
| from .fktv import ( | ||||
|     FKTVIE, | ||||
|     FKTVPosteckeIE, | ||||
| ) | ||||
| from .flickr import FlickrIE | ||||
| from .francetv import ( | ||||
|     PluzzIE, | ||||
|     FranceTvInfoIE, | ||||
|     France2IE, | ||||
|     GenerationQuoiIE | ||||
| ) | ||||
| from .freesound import FreesoundIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .gamespot import GameSpotIE | ||||
| @@ -29,6 +52,7 @@ from .gametrailers import GametrailersIE | ||||
| from .generic import GenericIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| from .hark import HarkIE | ||||
| from .hotnewhiphop import HotNewHipHopIE | ||||
| from .howcast import HowcastIE | ||||
| from .hypem import HypemIE | ||||
| @@ -36,24 +60,43 @@ from .ign import IGNIE, OneUPIE | ||||
| from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE | ||||
| from .jeuxvideo import JeuxVideoIE | ||||
| from .jukebox import JukeboxIE | ||||
| from .justintv import JustinTVIE | ||||
| from .kankan import KankanIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .keek import KeekIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import LivestreamIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mit import TechTVMITIE, MITIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mtv import MTVIE | ||||
| from .muzu import MuzuTVIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import NBCNewsIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .pornotube import PornotubeIE | ||||
| from .rbmaradio import RBMARadioIE | ||||
| from .redtube import RedTubeIE | ||||
| from .ringtv import RingTVIE | ||||
| from .ro220 import Ro220IE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .sina import SinaIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE | ||||
| from .slashdot import SlashdotIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .sohu import SohuIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE | ||||
| from .southparkstudios import SouthParkStudiosIE | ||||
| from .spiegel import SpiegelIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .statigram import StatigramIE | ||||
| @@ -63,16 +106,21 @@ from .ted import TEDIE | ||||
| from .tf1 import TF1IE | ||||
| from .thisav import ThisAVIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| from .trilulilu import TriluliluIE | ||||
| from .tudou import TudouIE | ||||
| from .tumblr import TumblrIE | ||||
| from .tutv import TutvIE | ||||
| from .ustream import UstreamIE | ||||
| from .unistra import UnistraIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| from .vbox7 import Vbox7IE | ||||
| from .veehd import VeeHDIE | ||||
| from .veoh import VeohIE | ||||
| from .vevo import VevoIE | ||||
| from .vimeo import VimeoIE | ||||
| from .vice import ViceIE | ||||
| from .viddler import ViddlerIE | ||||
| from .videofyme import VideofyMeIE | ||||
| from .vimeo import VimeoIE, VimeoChannelIE | ||||
| from .vine import VineIE | ||||
| from .c56 import C56IE | ||||
| from .wat import WatIE | ||||
| from .weibo import WeiboIE | ||||
| from .wimp import WimpIE | ||||
| @@ -93,6 +141,8 @@ from .youtube import ( | ||||
|     YoutubeShowIE, | ||||
|     YoutubeSubscriptionsIE, | ||||
|     YoutubeRecommendedIE, | ||||
|     YoutubeTruncatedURLIE, | ||||
|     YoutubeWatchLaterIE, | ||||
|     YoutubeFavouritesIE, | ||||
| ) | ||||
| from .zdf import ZDFIE | ||||
| @@ -105,12 +155,14 @@ _ALL_CLASSES = [ | ||||
| ] | ||||
| _ALL_CLASSES.append(GenericIE) | ||||
|  | ||||
|  | ||||
| def gen_extractors(): | ||||
|     """ Return a list of an instance of every supported extractor. | ||||
|     The order does matter; the first extractor matched is the one handling the URL. | ||||
|     """ | ||||
|     return [klass() for klass in _ALL_CLASSES] | ||||
|  | ||||
|  | ||||
| def get_info_extractor(ie_name): | ||||
|     """Returns the info extractor class with the given ie_name""" | ||||
|     return globals()[ie_name+'IE'] | ||||
|   | ||||
							
								
								
									
										75
									
								
								youtube_dl/extractor/addanime.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								youtube_dl/extractor/addanime.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AddAnimeIE(InfoExtractor): | ||||
|  | ||||
|     _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)' | ||||
|     IE_NAME = u'AddAnime' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', | ||||
|         u'file': u'24MR3YO5SAS9.flv', | ||||
|         u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1', | ||||
|         u'info_dict': { | ||||
|             u"description": u"One Piece 606", | ||||
|             u"title": u"One Piece 606" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         try: | ||||
|             mobj = re.match(self._VALID_URL, url) | ||||
|             video_id = mobj.group('video_id') | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError): | ||||
|                 raise | ||||
|  | ||||
|             redir_webpage = ee.cause.read().decode('utf-8') | ||||
|             action = self._search_regex( | ||||
|                 r'<form id="challenge-form" action="([^"]+)"', | ||||
|                 redir_webpage, u'Redirect form') | ||||
|             vc = self._search_regex( | ||||
|                 r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>', | ||||
|                 redir_webpage, u'redirect vc value') | ||||
|             av = re.search( | ||||
|                 r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', | ||||
|                 redir_webpage) | ||||
|             if av is None: | ||||
|                 raise ExtractorError(u'Cannot find redirect math task') | ||||
|             av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) | ||||
|  | ||||
|             parsed_url = compat_urllib_parse_urlparse(url) | ||||
|             av_val = av_res + len(parsed_url.netloc) | ||||
|             confirm_url = ( | ||||
|                 parsed_url.scheme + u'://' + parsed_url.netloc + | ||||
|                 action + '?' + | ||||
|                 compat_urllib_parse.urlencode({ | ||||
|                     'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) | ||||
|             self._download_webpage( | ||||
|                 confirm_url, video_id, | ||||
|                 note=u'Confirming after redirect') | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r"var normal_video_file = '(.*?)';", | ||||
|                                        webpage, u'video file URL') | ||||
|         video_title = self._og_search_title(webpage) | ||||
|         video_description = self._og_search_description(webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id':  video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': video_title, | ||||
|             'description': video_description | ||||
|         } | ||||
							
								
								
									
										138
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,138 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AppleTrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", | ||||
|         u"playlist": [ | ||||
|             { | ||||
|                 u"file": u"manofsteel-trailer4.mov", | ||||
|                 u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", | ||||
|                 u"info_dict": { | ||||
|                     u"duration": 111, | ||||
|                     u"title": u"Trailer 4", | ||||
|                     u"upload_date": u"20130523", | ||||
|                     u"uploader_id": u"wb", | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"manofsteel-trailer3.mov", | ||||
|                 u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", | ||||
|                 u"info_dict": { | ||||
|                     u"duration": 182, | ||||
|                     u"title": u"Trailer 3", | ||||
|                     u"upload_date": u"20130417", | ||||
|                     u"uploader_id": u"wb", | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"manofsteel-trailer.mov", | ||||
|                 u"md5": u"d0f1e1150989b9924679b441f3404d48", | ||||
|                 u"info_dict": { | ||||
|                     u"duration": 148, | ||||
|                     u"title": u"Trailer", | ||||
|                     u"upload_date": u"20121212", | ||||
|                     u"uploader_id": u"wb", | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"manofsteel-teaser.mov", | ||||
|                 u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", | ||||
|                 u"info_dict": { | ||||
|                     u"duration": 93, | ||||
|                     u"title": u"Teaser", | ||||
|                     u"upload_date": u"20120721", | ||||
|                     u"uploader_id": u"wb", | ||||
|                 }, | ||||
|             } | ||||
|         ] | ||||
|     } | ||||
|  | ||||
|     _JSON_RE = r'iTunes.playURL\((.*?)\);' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         movie = mobj.group('movie') | ||||
|         uploader_id = mobj.group('company') | ||||
|  | ||||
|         playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') | ||||
|         playlist_snippet = self._download_webpage(playlist_url, movie) | ||||
|         playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet) | ||||
|         playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned) | ||||
|         # The ' in the onClick attributes are not escaped, it couldn't be parsed | ||||
|         # with xml.etree.ElementTree.fromstring | ||||
|         # like: http://trailers.apple.com/trailers/wb/gravity/ | ||||
|         def _clean_json(m): | ||||
|             return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') | ||||
|         playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned) | ||||
|         playlist_html = u'<html>' + playlist_cleaned + u'</html>' | ||||
|  | ||||
|         doc = xml.etree.ElementTree.fromstring(playlist_html) | ||||
|         playlist = [] | ||||
|         for li in doc.findall('./div/ul/li'): | ||||
|             on_click = li.find('.//a').attrib['onClick'] | ||||
|             trailer_info_json = self._search_regex(self._JSON_RE, | ||||
|                 on_click, u'trailer info') | ||||
|             trailer_info = json.loads(trailer_info_json) | ||||
|             title = trailer_info['title'] | ||||
|             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() | ||||
|             thumbnail = li.find('.//img').attrib['src'] | ||||
|             upload_date = trailer_info['posted'].replace('-', '') | ||||
|  | ||||
|             runtime = trailer_info['runtime'] | ||||
|             m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) | ||||
|             duration = None | ||||
|             if m: | ||||
|                 duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | ||||
|  | ||||
|             first_url = trailer_info['url'] | ||||
|             trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() | ||||
|             settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) | ||||
|             settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') | ||||
|             settings = json.loads(settings_json) | ||||
|  | ||||
|             formats = [] | ||||
|             for format in settings['metadata']['sizes']: | ||||
|                 # The src is a file pointing to the real video file | ||||
|                 format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'ext': determine_ext(format_url), | ||||
|                     'format': format['type'], | ||||
|                     'width': format['width'], | ||||
|                     'height': int(format['height']), | ||||
|                 }) | ||||
|             formats = sorted(formats, key=lambda f: (f['height'], f['width'])) | ||||
|  | ||||
|             info = { | ||||
|                 '_type': 'video', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'formats': formats, | ||||
|                 'title': title, | ||||
|                 'duration': duration, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'upload_date': upload_date, | ||||
|                 'uploader_id': uploader_id, | ||||
|                 'user_agent': 'QuickTime compatible (youtube-dl)', | ||||
|             } | ||||
|             # TODO: Remove when #980 has been merged | ||||
|             info['url'] = formats[-1]['url'] | ||||
|             info['ext'] = formats[-1]['ext'] | ||||
|  | ||||
|             playlist.append(info) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': movie, | ||||
|             'entries': playlist, | ||||
|         } | ||||
| @@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor): | ||||
|             for fn,fdata in data['files'].items() | ||||
|             if 'Video' in fdata['format']] | ||||
|         formats.sort(key=lambda fdata: fdata['file_size']) | ||||
|         for f in formats: | ||||
|             f['ext'] = determine_ext(f['url']) | ||||
|  | ||||
|         info = { | ||||
|             '_type': 'video', | ||||
| @@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor): | ||||
|             info['thumbnail'] = thumbnail | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info['url'] = formats[-1]['url'] | ||||
|         info['ext'] = determine_ext(formats[-1]['url']) | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|         return info | ||||
|   | ||||
| @@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor): | ||||
|     """ | ||||
|     _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' | ||||
|     _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' | ||||
|     _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' | ||||
|     _LIVE_URL = r'index-[0-9]+\.html$' | ||||
|  | ||||
|     IE_NAME = u'arte.tv' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) | ||||
|         return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL)) | ||||
|  | ||||
|     # TODO implement Live Stream | ||||
|     # from ..utils import compat_urllib_parse | ||||
| @@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor): | ||||
|             lang = mobj.group('lang') | ||||
|             return self._extract_video(url, id, lang) | ||||
|  | ||||
|         mobj = re.match(self._LIVEWEB_URL, url) | ||||
|         if mobj is not None: | ||||
|             name = mobj.group('name') | ||||
|             lang = mobj.group('lang') | ||||
|             return self._extract_liveweb(url, name, lang) | ||||
|  | ||||
|         if re.search(self._LIVE_URL, video_id) is not None: | ||||
|             raise ExtractorError(u'Arte live streams are not yet supported, sorry') | ||||
|             # self.extractLiveStream(url) | ||||
| @@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor): | ||||
|  | ||||
|         info_dict = {'id': player_info['VID'], | ||||
|                      'title': player_info['VTI'], | ||||
|                      'description': player_info['VDE'], | ||||
|                      'description': player_info.get('VDE'), | ||||
|                      'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), | ||||
|                      'thumbnail': player_info['programImage'], | ||||
|                      'ext': 'flv', | ||||
| @@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor): | ||||
|                 l = 'F' | ||||
|             elif lang == 'de': | ||||
|                 l = 'A' | ||||
|             regexes = [r'VO?%s' % l, r'V%s-ST.' % l] | ||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] | ||||
|             return any(re.match(r, f['versionCode']) for r in regexes) | ||||
|         # Some formats may not be in the same language as the url | ||||
|         formats = filter(_match_lang, formats) | ||||
|         # We order the formats by quality | ||||
|         formats = sorted(formats, key=lambda f: int(f['height'])) | ||||
|         # Prefer videos without subtitles in the same language | ||||
|         formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None) | ||||
|         # Pick the best quality | ||||
|         format_info = formats[-1] | ||||
|         if format_info['mediaType'] == u'rtmp': | ||||
| @@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor): | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 } | ||||
|  | ||||
|     def _extract_liveweb(self, url, name, lang): | ||||
|         """Extract form http://liveweb.arte.tv/""" | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') | ||||
|         config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, | ||||
|                                             video_id, u'Downloading information') | ||||
|         config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) | ||||
|         event_doc = config_doc.find('event') | ||||
|         url_node = event_doc.find('video').find('urlHd') | ||||
|         if url_node is None: | ||||
|             url_node = video_doc.find('urlSd') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': event_doc.find('name%s' % lang.capitalize()).text, | ||||
|                 'url': url_node.text.replace('MP4', 'mp4'), | ||||
|                 'ext': 'flv', | ||||
|                 'thumbnail': self._og_search_thumbnail(webpage), | ||||
|                 } | ||||
|   | ||||
| @@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor): | ||||
|                 ext = umobj.group(1) | ||||
|  | ||||
|                 info = { | ||||
|                     'id': data['item_id'], | ||||
|                     'id': compat_str(data['item_id']), | ||||
|                     'url': video_url, | ||||
|                     'uploader': data['display_name'], | ||||
|                     'upload_date': upload_date, | ||||
|   | ||||
							
								
								
									
										27
									
								
								youtube_dl/extractor/bloomberg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								youtube_dl/extractor/bloomberg.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BloombergIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', | ||||
|         u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', | ||||
|             u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # Requires ffmpeg (m3u8 manifest) | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         ooyala_url = self._og_search_video_url(webpage) | ||||
|         return self.url_result(ooyala_url, ie='Ooyala') | ||||
| @@ -1,3 +1,5 @@ | ||||
| # encoding: utf-8 | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import xml.etree.ElementTree | ||||
| @@ -7,15 +9,39 @@ from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     find_xpath_attr, | ||||
|     compat_urlparse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class BrightcoveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' | ||||
|     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' | ||||
|     _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' | ||||
|      | ||||
|     # There is a test for Brigtcove in GenericIE, that way we test both the download | ||||
|     # and the detection of videos, and we don't have to find an URL that is always valid | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ | ||||
|             u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', | ||||
|             u'file': u'2371591881001.mp4', | ||||
|             u'md5': u'9e80619e0a94663f0bdc849b4566af19', | ||||
|             u'note': u'Test Brightcove downloads and detection in GenericIE', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', | ||||
|                 u'uploader': u'8TV', | ||||
|                 u'description': u'md5:a950cc4285c43e44d763d036710cd9cd', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             # From http://medianetwork.oracle.com/video/player/1785452137001 | ||||
|             u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', | ||||
|             u'file': u'1785452137001.flv', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', | ||||
|                 u'description': u'John Rose speaks at the JVM Language Summit, August 1, 2012.', | ||||
|                 u'uploader': u'Oracle', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def _build_brighcove_url(cls, object_str): | ||||
| @@ -23,6 +49,11 @@ class BrightcoveIE(InfoExtractor): | ||||
|         Build a Brightcove url from a xml string containing | ||||
|         <object class="BrightcoveExperience">{params}</object> | ||||
|         """ | ||||
|  | ||||
|         # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 | ||||
|         object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', | ||||
|                             lambda m: m.group(1) + '/>', object_str) | ||||
|  | ||||
|         object_doc = xml.etree.ElementTree.fromstring(object_str) | ||||
|         assert u'BrightcoveExperience' in object_doc.attrib['class'] | ||||
|         params = {'flashID': object_doc.attrib['id'], | ||||
| @@ -72,15 +103,27 @@ class BrightcoveIE(InfoExtractor): | ||||
|                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName']) | ||||
|  | ||||
|     def _extract_video_info(self, video_info): | ||||
|         renditions = video_info['renditions'] | ||||
|         renditions = sorted(renditions, key=lambda r: r['size']) | ||||
|         best_format = renditions[-1] | ||||
|         info = { | ||||
|             'id': video_info['id'], | ||||
|             'title': video_info['displayName'], | ||||
|             'description': video_info.get('shortDescription'), | ||||
|             'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), | ||||
|             'uploader': video_info.get('publisherName'), | ||||
|         } | ||||
|  | ||||
|         return {'id': video_info['id'], | ||||
|                 'title': video_info['displayName'], | ||||
|                 'url': best_format['defaultURL'],  | ||||
|         renditions = video_info.get('renditions') | ||||
|         if renditions: | ||||
|             renditions = sorted(renditions, key=lambda r: r['size']) | ||||
|             best_format = renditions[-1] | ||||
|             info.update({ | ||||
|                 'url': best_format['defaultURL'], | ||||
|                 'ext': 'mp4', | ||||
|                 'description': video_info.get('shortDescription'), | ||||
|                 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), | ||||
|                 'uploader': video_info.get('publisherName'), | ||||
|                 } | ||||
|             }) | ||||
|         elif video_info.get('FLVFullLengthURL') is not None: | ||||
|             info.update({ | ||||
|                 'url': video_info['FLVFullLengthURL'], | ||||
|                 'ext': 'flv', | ||||
|             }) | ||||
|         else: | ||||
|             raise ExtractorError(u'Unable to extract video url for %s' % info['id']) | ||||
|         return info | ||||
|   | ||||
| @@ -12,8 +12,8 @@ class C56IE(InfoExtractor): | ||||
|  | ||||
|     _TEST ={ | ||||
|         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', | ||||
|         u'file': u'93440716.mp4', | ||||
|         u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e', | ||||
|         u'file': u'93440716.flv', | ||||
|         u'md5': u'e59995ac63d0457783ea05f93f12a866', | ||||
|         u'info_dict': { | ||||
|             u'title': u'网事知多少 第32期:车怒', | ||||
|         }, | ||||
|   | ||||
							
								
								
									
										35
									
								
								youtube_dl/extractor/canalc2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								youtube_dl/extractor/canalc2.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| # coding: utf-8 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class Canalc2IE(InfoExtractor): | ||||
|     IE_NAME = 'canalc2.tv' | ||||
|     _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', | ||||
|         u'file': u'12163.mp4', | ||||
|         u'md5': u'060158428b650f896c542dfbb3d6487f', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Terrasses du Numérique' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         file_name = self._search_regex( | ||||
|             r"so\.addVariable\('file','(.*?)'\);", | ||||
|             webpage, 'file name') | ||||
|         video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'class="evenement8">(.*?)</a>', webpage, u'title') | ||||
|          | ||||
|         return {'id': video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'title': title, | ||||
|                 } | ||||
| @@ -1,3 +1,4 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| @@ -5,24 +6,29 @@ from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' | ||||
|     IE_NAME = u'canalplus.fr' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861', | ||||
|         u'file': u'889861.flv', | ||||
|         u'md5': u'590a888158b5f0d6832f84001fbf3e99', | ||||
|         u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', | ||||
|         u'file': u'922470.flv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Le Petit Journal 20/06/13 - La guerre des drone', | ||||
|             u'upload_date': u'20130620', | ||||
|             u'title': u'Zapping - 26/08/13', | ||||
|             u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', | ||||
|             u'upload_date': u'20130826', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         if video_id is None: | ||||
|             webpage = self._download_webpage(url, mobj.group('path')) | ||||
|             video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') | ||||
|         info_url = self._VIDEO_INFO_TEMPLATE % video_id | ||||
|         info_page = self._download_webpage(info_url,video_id,  | ||||
|                                            u'Downloading video info') | ||||
| @@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor): | ||||
|                 'ext': 'flv', | ||||
|                 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), | ||||
|                 'thumbnail': media.find('IMAGES/GRAND').text, | ||||
|                 'description': infos.find('DESCRIPTION').text, | ||||
|                 'view_count': int(infos.find('NB_VUES').text), | ||||
|                 } | ||||
|   | ||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/cnn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/cnn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class CNNIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/ | ||||
|         (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', | ||||
|         u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4', | ||||
|         u'md5': u'3e6121ea48df7e2259fe73a0628605c4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Nadal wins 8th French Open title', | ||||
|             u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", | ||||
|         u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", | ||||
|         u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e", | ||||
|         u"info_dict": { | ||||
|             u"title": "Student's epic speech stuns new freshmen", | ||||
|             u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"" | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         path = mobj.group('path') | ||||
|         page_title = mobj.group('title') | ||||
|         info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path | ||||
|         info_xml = self._download_webpage(info_url, page_title) | ||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | ||||
|  | ||||
|         formats = [] | ||||
|         for f in info.findall('files/file'): | ||||
|             mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate']) | ||||
|             if mf is not None: | ||||
|                 formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text)) | ||||
|         formats = sorted(formats) | ||||
|         (_,_,_, video_path) = formats[-1] | ||||
|         video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path | ||||
|  | ||||
|         thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) | ||||
|         thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] | ||||
|  | ||||
|         return {'id': info.attrib['id'], | ||||
|                 'title': info.find('headline').text, | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 'thumbnail': thumbnails[-1][1], | ||||
|                 'thumbnails': thumbs_dict, | ||||
|                 'description': info.find('description').text, | ||||
|                 } | ||||
| @@ -4,15 +4,16 @@ import xml.etree.ElementTree | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     determine_ext, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CollegeHumorIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', | ||||
|         u'file': u'6902724.mp4', | ||||
|         u'md5': u'1264c12ad95dca142a9f0bf7968105a0', | ||||
| @@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             u'title': u'Comic-Con Cosplay Catastrophe', | ||||
|             u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.', | ||||
|         }, | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.collegehumor.com/video/3505939/font-conference', | ||||
|         u'file': u'3505939.mp4', | ||||
|         u'md5': u'c51ca16b82bb456a4397987791a835f5', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Font Conference', | ||||
|             u'description': u'This video wasn\'t long enough, so we made it double-spaced.', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             info['description'] = videoNode.findall('./description')[0].text | ||||
|             info['title'] = videoNode.findall('./caption')[0].text | ||||
|             info['thumbnail'] = videoNode.findall('./thumbnail')[0].text | ||||
|             manifest_url = videoNode.findall('./file')[0].text | ||||
|             next_url = videoNode.findall('./file')[0].text | ||||
|         except IndexError: | ||||
|             raise ExtractorError(u'Invalid metadata XML file') | ||||
|  | ||||
|         manifest_url += '?hdcore=2.10.3' | ||||
|         manifestXml = self._download_webpage(manifest_url, video_id, | ||||
|                                              u'Downloading XML manifest', | ||||
|                                              u'Unable to download video info XML') | ||||
|         if next_url.endswith(u'manifest.f4m'): | ||||
|             manifest_url = next_url + '?hdcore=2.10.3' | ||||
|             manifestXml = self._download_webpage(manifest_url, video_id, | ||||
|                                          u'Downloading XML manifest', | ||||
|                                          u'Unable to download video info XML') | ||||
|  | ||||
|         adoc = xml.etree.ElementTree.fromstring(manifestXml) | ||||
|         try: | ||||
|             media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | ||||
|             node_id = media_node.attrib['url'] | ||||
|             video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||
|         except IndexError as err: | ||||
|             raise ExtractorError(u'Invalid manifest file') | ||||
|             adoc = xml.etree.ElementTree.fromstring(manifestXml) | ||||
|             try: | ||||
|                 media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | ||||
|                 node_id = media_node.attrib['url'] | ||||
|                 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | ||||
|             except IndexError as err: | ||||
|                 raise ExtractorError(u'Invalid manifest file') | ||||
|             url_pr = compat_urllib_parse_urlparse(info['thumbnail']) | ||||
|             info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') | ||||
|             info['ext'] = 'mp4' | ||||
|         else: | ||||
|             # Old-style direct links | ||||
|             info['url'] = next_url | ||||
|             info['ext'] = determine_ext(info['url']) | ||||
|  | ||||
|         url_pr = compat_urllib_parse_urlparse(info['thumbnail']) | ||||
|  | ||||
|         info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') | ||||
|         info['ext'] = 'mp4' | ||||
|         return [info] | ||||
|         return info | ||||
|   | ||||
| @@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor): | ||||
|         '400': 'mp4', | ||||
|     } | ||||
|     _video_dimensions = { | ||||
|         '3500': '1280x720', | ||||
|         '2200': '960x540', | ||||
|         '1700': '768x432', | ||||
|         '1200': '640x360', | ||||
|         '750': '512x288', | ||||
|         '400': '384x216', | ||||
|         '3500': (1280, 720), | ||||
|         '2200': (960, 540), | ||||
|         '1700': (768, 432), | ||||
|         '1200': (640, 360), | ||||
|         '750': (512, 288), | ||||
|         '400': (384, 216), | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
| @@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for x in formats: | ||||
|             print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _transform_rtmp_url(rtmp_video_url): | ||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) | ||||
|         if not m: | ||||
|             raise ExtractorError(u'Cannot transform RTMP url') | ||||
|         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | ||||
|         return base + m.group('finalid') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
| @@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor): | ||||
|                 self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found') | ||||
|                 continue | ||||
|  | ||||
|             if self._downloader.params.get('listformats', None): | ||||
|                 self._print_formats([i[0] for i in turls]) | ||||
|                 return | ||||
|  | ||||
|             # For now, just pick the highest bitrate | ||||
|             format,rtmp_video_url = turls[-1] | ||||
|  | ||||
|             # Get the format arg from the arg stream | ||||
|             req_format = self._downloader.params.get('format', None) | ||||
|  | ||||
|             # Select format if we can find one | ||||
|             for f,v in turls: | ||||
|                 if f == req_format: | ||||
|                     format, rtmp_video_url = f, v | ||||
|                     break | ||||
|  | ||||
|             m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) | ||||
|             if not m: | ||||
|                 raise ExtractorError(u'Cannot transform RTMP url') | ||||
|             base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | ||||
|             video_url = base + m.group('finalid') | ||||
|             formats = [] | ||||
|             for format, rtmp_video_url in turls: | ||||
|                 w, h = self._video_dimensions.get(format, (None, None)) | ||||
|                 formats.append({ | ||||
|                     'url': self._transform_rtmp_url(rtmp_video_url), | ||||
|                     'ext': self._video_extensions.get(format, 'mp4'), | ||||
|                     'format_id': format, | ||||
|                     'height': h, | ||||
|                     'width': w, | ||||
|                 }) | ||||
|  | ||||
|             effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) | ||||
|             info = { | ||||
|                 'id': shortMediaId, | ||||
|                 'url': video_url, | ||||
|                 'formats': formats, | ||||
|                 'uploader': showId, | ||||
|                 'upload_date': officialDate, | ||||
|                 'title': effTitle, | ||||
|                 'ext': 'mp4', | ||||
|                 'format': format, | ||||
|                 'thumbnail': None, | ||||
|                 'description': compat_str(officialTitle), | ||||
|             } | ||||
|  | ||||
|             # TODO: Remove when #980 has been merged | ||||
|             info.update(info['formats'][-1]) | ||||
|  | ||||
|             results.append(info) | ||||
|  | ||||
|         return results | ||||
|   | ||||
| @@ -35,6 +35,8 @@ class InfoExtractor(object): | ||||
|     title:          Video title, unescaped. | ||||
|     ext:            Video filename extension. | ||||
|  | ||||
|     Instead of url and ext, formats can also specified. | ||||
|  | ||||
|     The following fields are optional: | ||||
|  | ||||
|     format:         The video format, defaults to ext (used for --get-format) | ||||
| @@ -47,12 +49,25 @@ class InfoExtractor(object): | ||||
|     uploader_id:    Nickname or id of the video uploader. | ||||
|     location:       Physical location of the video. | ||||
|     player_url:     SWF Player URL (used for rtmpdump). | ||||
|     subtitles:      The subtitle file contents. | ||||
|     subtitles:      The subtitle file contents as a dictionary in the format | ||||
|                     {language: subtitles}. | ||||
|     view_count:     How many users have watched the video on the platform. | ||||
|     urlhandle:      [internal] The urlHandle to be used to download the file, | ||||
|                     like returned by urllib.request.urlopen | ||||
|     age_limit:      Age restriction for the video, as an integer (years) | ||||
|     formats:        A list of dictionaries for each format available, it must | ||||
|                     be ordered from worst to best quality. Potential fields: | ||||
|                     * url       Mandatory. The URL of the video file | ||||
|                     * ext       Will be calculated from url if missing | ||||
|                     * format    A human-readable description of the format | ||||
|                                 ("mp4 container with h264/opus"). | ||||
|                                 Calculated from width and height if missing. | ||||
|                     * format_id A short description of the format | ||||
|                                 ("mp4_h264_opus" or "19") | ||||
|                     * width     Width of the video, if known | ||||
|                     * height    Height of the video, if known | ||||
|  | ||||
|     The fields should all be Unicode strings. | ||||
|     Unless mentioned otherwise, the fields should be Unicode strings. | ||||
|  | ||||
|     Subclasses of this one should re-define the _real_initialize() and | ||||
|     _real_extract() methods and define a _VALID_URL regexp. | ||||
| @@ -77,7 +92,13 @@ class InfoExtractor(object): | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url) is not None | ||||
|  | ||||
|         # This does not use has/getattr intentionally - we want to know whether | ||||
|         # we have cached the regexp for *this* class, whereas getattr would also | ||||
|         # match the superclass | ||||
|         if '_VALID_URL_RE' not in cls.__dict__: | ||||
|             cls._VALID_URL_RE = re.compile(cls._VALID_URL) | ||||
|         return cls._VALID_URL_RE.match(url) is not None | ||||
|  | ||||
|     @classmethod | ||||
|     def working(cls): | ||||
| @@ -107,6 +128,11 @@ class InfoExtractor(object): | ||||
|         """Real extraction process. Redefine in subclasses.""" | ||||
|         pass | ||||
|  | ||||
|     @classmethod | ||||
|     def ie_key(cls): | ||||
|         """A string for getting the InfoExtractor with get_info_extractor""" | ||||
|         return cls.__name__[:-2] | ||||
|  | ||||
|     @property | ||||
|     def IE_NAME(self): | ||||
|         return type(self).__name__[:-2] | ||||
| @@ -122,7 +148,7 @@ class InfoExtractor(object): | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             if errnote is None: | ||||
|                 errnote = u'Unable to download webpage' | ||||
|             raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2]) | ||||
|             raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err) | ||||
|  | ||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): | ||||
|         """ Returns a tuple (page content as string, URL handle) """ | ||||
| @@ -133,12 +159,17 @@ class InfoExtractor(object): | ||||
|  | ||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote) | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         webpage_bytes = urlh.read() | ||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||
|         if m: | ||||
|             encoding = m.group(1) | ||||
|         else: | ||||
|             encoding = 'utf-8' | ||||
|         webpage_bytes = urlh.read() | ||||
|             m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]', | ||||
|                           webpage_bytes[:1024]) | ||||
|             if m: | ||||
|                 encoding = m.group(1).decode('ascii') | ||||
|             else: | ||||
|                 encoding = 'utf-8' | ||||
|         if self._downloader.params.get('dump_intermediate_pages', False): | ||||
|             try: | ||||
|                 url = url_or_request.get_full_url() | ||||
| @@ -288,6 +319,15 @@ class InfoExtractor(object): | ||||
|                                         self._og_regex('video')], | ||||
|                                        html, name, **kargs) | ||||
|  | ||||
|     def _rta_search(self, html): | ||||
|         # See http://www.rtalabel.org/index.php?content=howtofaq#single | ||||
|         if re.search(r'(?ix)<meta\s+name="rating"\s+' | ||||
|                      r'     content="RTA-5042-1996-1400-1577-RTA"', | ||||
|                      html): | ||||
|             return 18 | ||||
|         return 0 | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
|     Base class for paged search queries extractors. | ||||
|   | ||||
| @@ -1,27 +1,58 @@ | ||||
| import re | ||||
| import json | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_id, | ||||
|     orderedSet, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class DailymotionIE(InfoExtractor): | ||||
| class DailymotionBaseInfoExtractor(InfoExtractor): | ||||
|     @staticmethod | ||||
|     def _build_request(url): | ||||
|         """Build a request with the family filter disabled""" | ||||
|         request = compat_urllib_request.Request(url) | ||||
|         request.add_header('Cookie', 'family_filter=off') | ||||
|         return request | ||||
|  | ||||
| class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     """Information Extractor for Dailymotion""" | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' | ||||
|     IE_NAME = u'dailymotion' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | ||||
|         u'file': u'x33vw9.mp4', | ||||
|         u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"Alex and Van .",  | ||||
|             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" | ||||
|         } | ||||
|     } | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | ||||
|             u'file': u'x33vw9.mp4', | ||||
|             u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | ||||
|             u'info_dict': { | ||||
|                 u"uploader": u"Amphora Alex and Van .",  | ||||
|                 u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" | ||||
|             } | ||||
|         }, | ||||
|         # Vevo video | ||||
|         { | ||||
|             u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', | ||||
|             u'file': u'USUV71301934.mp4', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Roar (Official)', | ||||
|                 u'uploader': u'Katy Perry', | ||||
|                 u'upload_date': u'20130905', | ||||
|             }, | ||||
|             u'params': { | ||||
|                 u'skip_download': True, | ||||
|             }, | ||||
|             u'skip': u'VEVO is only available in some countries', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # Extract id and simplified title from URL | ||||
| @@ -30,15 +61,24 @@ class DailymotionIE(InfoExtractor): | ||||
|         video_id = mobj.group(1).split('_')[0].split('?')[0] | ||||
|  | ||||
|         video_extension = 'mp4' | ||||
|         url = 'http://www.dailymotion.com/video/%s' % video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url) | ||||
|         request.add_header('Cookie', 'family_filter=off') | ||||
|         request = self._build_request(url) | ||||
|         webpage = self._download_webpage(request, video_id) | ||||
|  | ||||
|         # Extract URL, uploader and title from webpage | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # It may just embed a vevo video: | ||||
|         m_vevo = re.search( | ||||
|             r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)', | ||||
|             webpage) | ||||
|         if m_vevo is not None: | ||||
|             vevo_id = m_vevo.group('id') | ||||
|             self.to_screen(u'Vevo video detected: %s' % vevo_id) | ||||
|             return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') | ||||
|  | ||||
|         video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', | ||||
|                                              # Looking for official user | ||||
|                                              r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], | ||||
| @@ -52,8 +92,12 @@ class DailymotionIE(InfoExtractor): | ||||
|         embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id | ||||
|         embed_page = self._download_webpage(embed_url, video_id, | ||||
|                                             u'Downloading embed page') | ||||
|         info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info') | ||||
|         info = self._search_regex(r'var info = ({.*?}),$', embed_page, | ||||
|             'video info', flags=re.MULTILINE) | ||||
|         info = json.loads(info) | ||||
|         if info.get('error') is not None: | ||||
|             msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         # TODO: support choosing qualities | ||||
|  | ||||
| @@ -68,6 +112,12 @@ class DailymotionIE(InfoExtractor): | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|         video_url = info[max_quality] | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id) | ||||
|             return | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
| @@ -75,5 +125,76 @@ class DailymotionIE(InfoExtractor): | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    self._og_search_title(webpage), | ||||
|             'ext':      video_extension, | ||||
|             'subtitles':    video_subtitles, | ||||
|             'thumbnail': info['thumbnail_url'] | ||||
|         }] | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         try: | ||||
|             sub_list = self._download_webpage( | ||||
|                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | ||||
|                 video_id, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) | ||||
|             return {} | ||||
|         info = json.loads(sub_list) | ||||
|         if (info['total'] > 0): | ||||
|             sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | ||||
|             return sub_lang_list | ||||
|         self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|         return {} | ||||
|  | ||||
|  | ||||
| class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|     IE_NAME = u'dailymotion:playlist' | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' | ||||
|     _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' | ||||
|     _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' | ||||
|  | ||||
|     def _extract_entries(self, id): | ||||
|         video_ids = [] | ||||
|         for pagenum in itertools.count(1): | ||||
|             request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) | ||||
|             webpage = self._download_webpage(request, | ||||
|                                              id, u'Downloading page %s' % pagenum) | ||||
|  | ||||
|             playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) | ||||
|             video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|                 break | ||||
|         return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') | ||||
|                    for video_id in orderedSet(video_ids)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         return {'_type': 'playlist', | ||||
|                 'id': playlist_id, | ||||
|                 'title': get_element_by_id(u'playlist_name', webpage), | ||||
|                 'entries': self._extract_entries(playlist_id), | ||||
|                 } | ||||
|  | ||||
|  | ||||
| class DailymotionUserIE(DailymotionPlaylistIE): | ||||
|     IE_NAME = u'dailymotion:user' | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' | ||||
|     _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>' | ||||
|     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         user = mobj.group('user') | ||||
|         webpage = self._download_webpage(url, user) | ||||
|         full_user = self._html_search_regex( | ||||
|             r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user), | ||||
|             webpage, u'user', flags=re.DOTALL) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': user, | ||||
|             'title': full_user, | ||||
|             'entries': self._extract_entries(user), | ||||
|         } | ||||
|   | ||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/daum.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/daum.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DaumIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' | ||||
|     IE_NAME = u'daum.net' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', | ||||
|         u'file': u'52554690.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'DOTA 2GETHER 시즌2 6회 - 2부', | ||||
|             u'description': u'DOTA 2GETHER 시즌2 6회 - 2부', | ||||
|             u'upload_date': u'20130831', | ||||
|             u'duration': 3868, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         canonical_url = 'http://tvpot.daum.net/v/%s' % video_id | ||||
|         webpage = self._download_webpage(canonical_url, video_id) | ||||
|         full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', | ||||
|             webpage, u'full id') | ||||
|         query = compat_urllib_parse.urlencode({'vid': full_id}) | ||||
|         info_xml = self._download_webpage( | ||||
|             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, | ||||
|             u'Downloading video info') | ||||
|         urls_xml = self._download_webpage( | ||||
|             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, | ||||
|             video_id, u'Downloading video formats info') | ||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | ||||
|         urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8')) | ||||
|  | ||||
|         self.to_screen(u'%s: Getting video urls' % video_id) | ||||
|         formats = [] | ||||
|         for format_el in urls.findall('result/output_list/output_list'): | ||||
|             profile = format_el.attrib['profile'] | ||||
|             format_query = compat_urllib_parse.urlencode({ | ||||
|                 'vid': full_id, | ||||
|                 'profile': profile, | ||||
|             }) | ||||
|             url_xml = self._download_webpage( | ||||
|                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, | ||||
|                 video_id, note=False) | ||||
|             url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8')) | ||||
|             format_url = url_doc.find('result/url').text | ||||
|             formats.append({ | ||||
|                 'url': format_url, | ||||
|                 'ext': determine_ext(format_url), | ||||
|                 'format_id': profile, | ||||
|             }) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': info.find('TITLE').text, | ||||
|             'formats': formats, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': info.find('CONTENTS').text, | ||||
|             'duration': int(info.find('DURATION').text), | ||||
|             'upload_date': info.find('REGDTTM').text[:8], | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/defense.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/defense.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class DefenseGouvFrIE(InfoExtractor): | ||||
|     _IE_NAME = 'defense.gouv.fr' | ||||
|     _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' | ||||
|         r'ligthboxvideo/base-de-medias/webtv/(.*)') | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/' | ||||
|         u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'), | ||||
|         u'file': u'11213.mp4', | ||||
|         u'md5': u'75bba6124da7e63d2d60b5244ec9430c', | ||||
|         "info_dict": { | ||||
|             "title": "attaque-chimique-syrienne-du-21-aout-2013-1" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         video_id = self._search_regex( | ||||
|             r"flashvars.pvg_id=\"(\d+)\";", | ||||
|             webpage, 'ID') | ||||
|          | ||||
|         json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' | ||||
|             + video_id) | ||||
|         info = self._download_webpage(json_url, title, | ||||
|                                                   'Downloading JSON config') | ||||
|         video_url = json.loads(info)['renditions'][0]['url'] | ||||
|          | ||||
|         return {'id': video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'title': title, | ||||
|                 } | ||||
| @@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor): | ||||
|             'width': int(fe.find('./width').text), | ||||
|             'height': int(fe.find('./height').text), | ||||
|             'url': fe.find('./url').text, | ||||
|             'ext': determine_ext(fe.find('./url').text), | ||||
|             'filesize': int(fe.find('./filesize').text), | ||||
|             'video_bitrate': int(fe.find('./videoBitrate').text), | ||||
|             '3sat_qualityname': fe.find('./quality').text, | ||||
| @@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info['url'] = formats[-1]['url'] | ||||
|         info['ext'] = determine_ext(formats[-1]['url']) | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
|         return info | ||||
|   | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/ebaumsworld.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/ebaumsworld.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class EbaumsWorldIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.ebaumsworld.com/video/watch/83367677/', | ||||
|         u'file': u'83367677.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'A Giant Python Opens The Door', | ||||
|             u'description': u'This is how nightmares start...', | ||||
|             u'uploader': u'jihadpizza', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         config_xml = self._download_webpage( | ||||
|             'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) | ||||
|         config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) | ||||
|         video_url = config.find('file').text | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': config.find('title').text, | ||||
|             'url': video_url, | ||||
|             'ext': determine_ext(video_url), | ||||
|             'description': config.find('description').text, | ||||
|             'thumbnail': config.find('image').text, | ||||
|             'uploader': config.find('username').text, | ||||
|         } | ||||
| @@ -106,8 +106,8 @@ class FacebookIE(InfoExtractor): | ||||
|         video_duration = int(video_data['video_duration']) | ||||
|         thumbnail = video_data['thumbnail_src'] | ||||
|  | ||||
|         video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>', | ||||
|             webpage, u'title') | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title') | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|   | ||||
							
								
								
									
										79
									
								
								youtube_dl/extractor/fktv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								youtube_dl/extractor/fktv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | ||||
| import re | ||||
| import random | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     get_element_by_id, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FKTVIE(InfoExtractor): | ||||
|     IE_NAME = u'fernsehkritik.tv' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://fernsehkritik.tv/folge-1', | ||||
|         u'file': u'00011.flv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Folge 1 vom 10. April 2007', | ||||
|             u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         episode = int(mobj.group('ep')) | ||||
|  | ||||
|         server = random.randint(2, 4) | ||||
|         video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode | ||||
|         start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode, | ||||
|             episode) | ||||
|         playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, | ||||
|             u'playlist', flags=re.DOTALL) | ||||
|         files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) | ||||
|         # TODO: return a single multipart video | ||||
|         videos = [] | ||||
|         for i, _ in enumerate(files, 1): | ||||
|             video_id = '%04d%d' % (episode, i) | ||||
|             video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i) | ||||
|             video_title = 'Fernsehkritik %d.%d' % (episode, i) | ||||
|             videos.append({ | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 'title': clean_html(get_element_by_id('eptitle', start_webpage)), | ||||
|                 'description': clean_html(get_element_by_id('contentlist', start_webpage)), | ||||
|                 'thumbnail': video_thumbnail | ||||
|             }) | ||||
|         return videos | ||||
|  | ||||
|  | ||||
| class FKTVPosteckeIE(InfoExtractor): | ||||
|     IE_NAME = u'fernsehkritik.tv:postecke' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', | ||||
|         u'file': u'0120.flv', | ||||
|         u'md5': u'262f0adbac80317412f7e57b4808e5c4', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Postecke 120" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         episode = int(mobj.group('ep')) | ||||
|  | ||||
|         server = random.randint(2, 4) | ||||
|         video_id = '%04d' % episode | ||||
|         video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) | ||||
|         video_title = 'Postecke %d' % episode | ||||
|         return { | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      determine_ext(video_url), | ||||
|             'title':    video_title, | ||||
|         } | ||||
| @@ -9,7 +9,7 @@ from ..utils import ( | ||||
|  | ||||
| class FlickrIE(InfoExtractor): | ||||
|     """Information Extractor for Flickr videos""" | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', | ||||
|         u'file': u'5645318632.mp4', | ||||
|   | ||||
							
								
								
									
										129
									
								
								youtube_dl/extractor/francetv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								youtube_dl/extractor/francetv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,129 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceTVBaseInfoExtractor(InfoExtractor): | ||||
|     def _extract_video(self, video_id): | ||||
|         xml_desc = self._download_webpage( | ||||
|             'http://www.francetvinfo.fr/appftv/webservices/video/' | ||||
|             'getInfosOeuvre.php?id-diffusion=' | ||||
|             + video_id, video_id, 'Downloading XML config') | ||||
|         info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8')) | ||||
|  | ||||
|         manifest_url = info.find('videos/video/url').text | ||||
|         video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') | ||||
|         video_url = video_url.replace('/z/', '/i/') | ||||
|         thumbnail_path = info.find('image').text | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'title': info.find('titre').text, | ||||
|                 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), | ||||
|                 'description': info.find('synopsis').text, | ||||
|                 } | ||||
|  | ||||
|  | ||||
| class PluzzIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'pluzz.francetv.fr' | ||||
|     _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' | ||||
|  | ||||
|     # Can't use tests, videos expire in 7 days | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         video_id = self._search_regex( | ||||
|             r'data-diffusion="(\d+)"', webpage, 'ID') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
| class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'francetvinfo.fr' | ||||
|     _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', | ||||
|         u'file': u'84981923.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Soir 3', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
| class France2IE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = u'france2.fr' | ||||
|     _VALID_URL = r'''(?x)https?://www\.france2\.fr/ | ||||
|         (?: | ||||
|             emissions/.*?/videos/(?P<id>\d+) | ||||
|         |   emission/(?P<key>[^/?]+) | ||||
|         )''' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', | ||||
|         u'file': u'75540104.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'13h15, le samedi...', | ||||
|             u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj.group('key'): | ||||
|             webpage = self._download_webpage(url, mobj.group('key')) | ||||
|             video_id = self._html_search_regex( | ||||
|                 r'''(?x)<div\s+class="video-player">\s* | ||||
|                     <a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+ | ||||
|                     class="francetv-video-player">''', | ||||
|                 webpage, u'video ID') | ||||
|         else: | ||||
|             video_id = mobj.group('id') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
| class GenerationQuoiIE(InfoExtractor): | ||||
|     IE_NAME = u'france2.fr:generation-quoi' | ||||
|     _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous', | ||||
|         u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Génération Quoi - Garde à Vous', | ||||
|             u'uploader': u'Génération Quoi', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # It uses Dailymotion | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) | ||||
|         info_json = self._download_webpage(info_url, name) | ||||
|         info = json.loads(info_json) | ||||
|         return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], | ||||
|             ie='Dailymotion') | ||||
| @@ -21,17 +21,15 @@ class FunnyOrDieIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', | ||||
|         video_url = self._search_regex( | ||||
|             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], | ||||
|             webpage, u'video URL', flags=re.DOTALL) | ||||
|  | ||||
|         title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", | ||||
|             r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': title, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|         return [info] | ||||
|   | ||||
| @@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor): | ||||
|         u"file": u"6410818.mp4", | ||||
|         u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", | ||||
|         u"info_dict": { | ||||
|             u"title": u"Arma III - Community Guide: SITREP I", | ||||
|             u"title": u"Arma 3 - Community Guide: SITREP I", | ||||
|             u"upload_date": u"20130627",  | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -8,11 +8,13 @@ from ..utils import ( | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
| from .brightcove import BrightcoveIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
|     IE_DESC = u'Generic downloader that works on some sites' | ||||
|     _VALID_URL = r'.*' | ||||
| @@ -23,21 +25,10 @@ class GenericIE(InfoExtractor): | ||||
|             u'file': u'13601338388002.mp4', | ||||
|             u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', | ||||
|             u'info_dict': { | ||||
|                 u"uploader": u"www.hodiho.fr",  | ||||
|                 u"uploader": u"www.hodiho.fr", | ||||
|                 u"title": u"R\u00e9gis plante sa Jeep" | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/', | ||||
|             u'file': u'2371591881001.mp4', | ||||
|             u'md5': u'9e80619e0a94663f0bdc849b4566af19', | ||||
|             u'note': u'Test Brightcove downloads and detection in GenericIE', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', | ||||
|                 u'uploader': u'8TV', | ||||
|                 u'description': u'md5:a950cc4285c43e44d763d036710cd9cd', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
| @@ -107,8 +98,18 @@ class GenericIE(InfoExtractor): | ||||
|         return new_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         new_url = self._test_redirect(url) | ||||
|         if new_url: return [self.url_result(new_url)] | ||||
|         parsed_url = compat_urlparse.urlparse(url) | ||||
|         if not parsed_url.scheme: | ||||
|             self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') | ||||
|             return self.url_result('http://' + url) | ||||
|  | ||||
|         try: | ||||
|             new_url = self._test_redirect(url) | ||||
|             if new_url: | ||||
|                 return [self.url_result(new_url)] | ||||
|         except compat_urllib_error.HTTPError: | ||||
|             # This may be a stupid server that doesn't like HEAD, our UA, or so | ||||
|             pass | ||||
|  | ||||
|         video_id = url.split('/')[-1] | ||||
|         try: | ||||
| @@ -116,10 +117,10 @@ class GenericIE(InfoExtractor): | ||||
|         except ValueError: | ||||
|             # since this is the last-resort InfoExtractor, if | ||||
|             # this error is thrown, it'll be thrown here | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError(u'Failed to download URL: %s' % url) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         # Look for BrigthCove: | ||||
|         # Look for BrightCove: | ||||
|         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) | ||||
|         if m_brightcove is not None: | ||||
|             self.to_screen(u'Brightcove video detected.') | ||||
| @@ -145,15 +146,19 @@ class GenericIE(InfoExtractor): | ||||
|             if m_video_type is not None: | ||||
|                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             # HTML5 video | ||||
|             mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unsupported URL: %s' % url) | ||||
|  | ||||
|         # It's possible that one of the regexes | ||||
|         # matched, but returned an empty group: | ||||
|         if mobj.group(1) is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|             raise ExtractorError(u'Did not find a valid video URL at %s' % url) | ||||
|  | ||||
|         video_url = compat_urllib_parse.unquote(mobj.group(1)) | ||||
|         video_id = os.path.basename(video_url) | ||||
|         video_url = mobj.group(1) | ||||
|         video_url = compat_urlparse.urljoin(url, video_url) | ||||
|         video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) | ||||
|  | ||||
|         # here's a fun little line of code for you: | ||||
|         video_extension = os.path.splitext(video_id)[1][1:] | ||||
|   | ||||
| @@ -40,7 +40,9 @@ class GooglePlusIE(InfoExtractor): | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Extract update date | ||||
|         upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>', | ||||
|         upload_date = self._html_search_regex( | ||||
|             r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*> | ||||
|                     ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', | ||||
|             webpage, u'upload date', fatal=False) | ||||
|         if upload_date: | ||||
|             # Convert timestring to a format suitable for filename | ||||
| @@ -57,8 +59,8 @@ class GooglePlusIE(InfoExtractor): | ||||
|             webpage, 'title', default=u'NA') | ||||
|  | ||||
|         # Step 2, Simulate clicking the image box to launch video | ||||
|         DOMAIN = 'https://plus.google.com' | ||||
|         video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN), | ||||
|         DOMAIN = 'https://plus.google.com/' | ||||
|         video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN), | ||||
|             webpage, u'video page URL') | ||||
|         if not video_page.startswith(DOMAIN): | ||||
|             video_page = DOMAIN + video_page | ||||
|   | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/hark.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/hark.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
| class HarkIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', | ||||
|         u'file': u'mmbzyhkgny.mp3', | ||||
|         u'md5': u'6783a58491b47b92c7c1af5a77d4cbee', | ||||
|         u'info_dict': { | ||||
|             u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013", | ||||
|             u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.', | ||||
|             u'duration': 11, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         json_url = "http://www.hark.com/clips/%s.json" %(video_id) | ||||
|         info_json = self._download_webpage(json_url, video_id) | ||||
|         info = json.loads(info_json) | ||||
|         final_url = info['url'] | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'url' : final_url, | ||||
|                 'title': info['name'], | ||||
|                 'ext': determine_ext(final_url), | ||||
|                 'description': info['description'], | ||||
|                 'thumbnail': info['image_original'], | ||||
|                 'duration': info['duration'], | ||||
|                 } | ||||
| @@ -7,11 +7,11 @@ from .common import InfoExtractor | ||||
| class HotNewHipHopIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html' | ||||
|     _TEST = { | ||||
|         u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", | ||||
|         u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html", | ||||
|         u'file': u'1435540.mp3', | ||||
|         u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Freddie Gibbs Songs - Lay It Down" | ||||
|             u"title": u"Freddie Gibbs - Lay It Down" | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -19,8 +19,7 @@ class HowcastIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.howcast.com/videos/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ class IGNIE(InfoExtractor): | ||||
|     Some videos of it.ign.com are also supported | ||||
|     """ | ||||
|  | ||||
|     _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)' | ||||
|     _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)' | ||||
|     IE_NAME = u'ign.com' | ||||
|  | ||||
|     _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' | ||||
| @@ -21,15 +21,39 @@ class IGNIE(InfoExtractor): | ||||
|                        r'id="my_show_video">.*?<p>(.*?)</p>', | ||||
|                        ] | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', | ||||
|         u'file': u'8f862beef863986b2785559b9e1aa599.mp4', | ||||
|         u'md5': u'eac8bdc1890980122c3b66f14bdd02e9', | ||||
|         u'info_dict': { | ||||
|             u'title': u'The Last of Us Review', | ||||
|             u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c', | ||||
|         } | ||||
|     } | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', | ||||
|             u'file': u'8f862beef863986b2785559b9e1aa599.mp4', | ||||
|             u'md5': u'eac8bdc1890980122c3b66f14bdd02e9', | ||||
|             u'info_dict': { | ||||
|                 u'title': u'The Last of Us Review', | ||||
|                 u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', | ||||
|             u'playlist': [ | ||||
|                 { | ||||
|                     u'file': u'5ebbd138523268b93c9141af17bec937.mp4', | ||||
|                     u'info_dict': { | ||||
|                         u'title': u'GTA 5 Video Review', | ||||
|                         u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', | ||||
|                     }, | ||||
|                 }, | ||||
|                 { | ||||
|                     u'file': u'638672ee848ae4ff108df2a296418ee2.mp4', | ||||
|                     u'info_dict': { | ||||
|                         u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion', | ||||
|                         u'description': u'The twisted beauty of GTA 5 in stunning slow motion.', | ||||
|                     }, | ||||
|                 }, | ||||
|             ], | ||||
|             u'params': { | ||||
|                 u'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _find_video_id(self, webpage): | ||||
|         res_id = [r'data-video-id="(.+?)"', | ||||
| @@ -41,7 +65,18 @@ class IGNIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name_or_id = mobj.group('name_or_id') | ||||
|         page_type = mobj.group('type') | ||||
|         webpage = self._download_webpage(url, name_or_id) | ||||
|         if page_type == 'articles': | ||||
|             video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url') | ||||
|             return self.url_result(video_url, ie='IGN') | ||||
|         elif page_type != 'video': | ||||
|             multiple_urls = re.findall( | ||||
|                 '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', | ||||
|                 webpage) | ||||
|             if multiple_urls: | ||||
|                 return [self.url_result(u, ie='IGN') for u in multiple_urls] | ||||
|  | ||||
|         video_id = self._find_video_id(webpage) | ||||
|         result = self._get_video_info(video_id) | ||||
|         description = self._html_search_regex(self._DESCRIPTION_RE, | ||||
| @@ -68,7 +103,7 @@ class IGNIE(InfoExtractor): | ||||
| class OneUPIE(IGNIE): | ||||
|     """Extractor for 1up.com, it uses the ign videos system.""" | ||||
|  | ||||
|     _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)' | ||||
|     _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)' | ||||
|     IE_NAME = '1up.com' | ||||
|  | ||||
|     _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' | ||||
| @@ -83,6 +118,9 @@ class OneUPIE(IGNIE): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # Override IGN tests | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         id = mobj.group('name_or_id') | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from .common import InfoExtractor | ||||
|  | ||||
| class InaIE(InfoExtractor): | ||||
|     """Information Extractor for Ina.fr""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*' | ||||
|     _TEST = { | ||||
|         u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', | ||||
|         u'file': u'I12055569.mp4', | ||||
|   | ||||
							
								
								
									
										52
									
								
								youtube_dl/extractor/jeuxvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								youtube_dl/extractor/jeuxvideo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class JeuxVideoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', | ||||
|         u'file': u'5182.mp4', | ||||
|         u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee', | ||||
|         u'info_dict': { | ||||
|             u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité', | ||||
|             u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         xml_link = self._html_search_regex( | ||||
|             r'<param name="flashvars" value="config=(.*?)" />', | ||||
|             webpage, u'config URL') | ||||
|          | ||||
|         video_id = self._search_regex( | ||||
|             r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', | ||||
|             xml_link, u'video ID') | ||||
|  | ||||
|         xml_config = self._download_webpage( | ||||
|             xml_link, title, u'Downloading XML config') | ||||
|         config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) | ||||
|         info_json = self._search_regex( | ||||
|             r'(?sm)<format\.json>(.*?)</format\.json>', | ||||
|             xml_config, u'JSON information') | ||||
|         info = json.loads(info_json)['versions'][0] | ||||
|          | ||||
|         video_url = 'http://video720.jeuxvideo.com/' + info['file'] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': config.find('titre_video').text, | ||||
|             'ext': 'mp4', | ||||
|             'url': video_url, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': config.find('image').text, | ||||
|         } | ||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/kankan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/kankan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class KankanIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml' | ||||
|      | ||||
|     _TEST = { | ||||
|         u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml', | ||||
|         u'file': u'48863.flv', | ||||
|         u'md5': u'29aca1e47ae68fc28804aca89f29507e', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Ready To Go', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title') | ||||
|         surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0) | ||||
|         gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls) | ||||
|         gcid = gcids[-1] | ||||
|  | ||||
|         video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid, | ||||
|                                                  video_id, u'Downloading video url info') | ||||
|         ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip') | ||||
|         path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path') | ||||
|         video_url = 'http://%s%s' % (ip, path) | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 } | ||||
| @@ -4,10 +4,10 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class KeekIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' | ||||
|     IE_NAME = u'keek' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         u'url': u'https://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         u'file': u'NODfbab.mp4', | ||||
|         u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83', | ||||
|         u'info_dict': { | ||||
|   | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/kickstarter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/kickstarter.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class KickStarterIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*' | ||||
|     _TEST = { | ||||
|         u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location", | ||||
|         u"file": u"1404461844.mp4", | ||||
|         u"md5": u"c81addca81327ffa66c642b5d8b08cab", | ||||
|         u"info_dict": { | ||||
|             u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling", | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|         webpage_src = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex(r'data-video="(.*?)">', | ||||
|             webpage_src, u'video URL') | ||||
|         if 'mp4' in video_url: | ||||
|             ext = 'mp4' | ||||
|         else: | ||||
|             ext = 'flv' | ||||
|         video_title = self._html_search_regex(r"<title>(.*?)</title>", | ||||
|             webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip() | ||||
|  | ||||
|         results = [{ | ||||
|                     'id': video_id, | ||||
|                     'url': video_url, | ||||
|                     'title': video_title, | ||||
|                     'ext': ext, | ||||
|                     }] | ||||
|         return results | ||||
| @@ -2,7 +2,12 @@ import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_parse_urlparse, compat_urlparse | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
|     get_meta_content, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LivestreamIE(InfoExtractor): | ||||
| @@ -35,8 +40,11 @@ class LivestreamIE(InfoExtractor): | ||||
|  | ||||
|         if video_id is None: | ||||
|             # This is an event page: | ||||
|             api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'', | ||||
|                                          webpage, 'api url') | ||||
|             player = get_meta_content('twitter:player', webpage) | ||||
|             if player is None: | ||||
|                 raise ExtractorError('Couldn\'t extract event api url') | ||||
|             api_url = player.replace('/player', '') | ||||
|             api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url) | ||||
|             info = json.loads(self._download_webpage(api_url, event_name, | ||||
|                                                      u'Downloading event info')) | ||||
|             videos = [self._extract_video_info(video_data['data']) | ||||
|   | ||||
| @@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor): | ||||
|         video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title') | ||||
|         description = self._og_search_description(webpage) | ||||
|         video_uploader = self._html_search_regex( | ||||
|                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);', | ||||
|                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', | ||||
|                 webpage, u'uploader nickname', fatal=False) | ||||
|  | ||||
|         return { | ||||
|   | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/metacritic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/metacritic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import operator | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MetacriticIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', | ||||
|         u'file': u'3698222.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors', | ||||
|             u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.', | ||||
|             u'duration': 221, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         # The xml is not well formatted, there are raw '&' | ||||
|         info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id, | ||||
|             video_id, u'Downloading info xml').replace('&', '&') | ||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | ||||
|  | ||||
|         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) | ||||
|         formats = [] | ||||
|         for videoFile in clip.findall('httpURI/videoFile'): | ||||
|             rate_str = videoFile.find('rate').text | ||||
|             video_url = videoFile.find('filePath').text | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': rate_str, | ||||
|                 'rate': int(rate_str), | ||||
|             }) | ||||
|         formats.sort(key=operator.itemgetter('rate')) | ||||
|  | ||||
|         description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', | ||||
|             webpage, u'description', flags=re.DOTALL) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': clip.find('title').text, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'duration': int(clip.find('duration').text), | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/mit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/mit.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     get_element_by_id, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TechTVMITIE(InfoExtractor): | ||||
|     IE_NAME = u'techtv.mit.edu' | ||||
|     _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | ||||
|         u'file': u'25418.mp4', | ||||
|         u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', | ||||
|         u'info_dict': { | ||||
|             u'title': u'MIT DNA Learning Center Set', | ||||
|             u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         raw_page = self._download_webpage( | ||||
|             'http://techtv.mit.edu/videos/%s' % video_id, video_id) | ||||
|         clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page) | ||||
|  | ||||
|         base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', | ||||
|             raw_page, u'base url') | ||||
|         formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, | ||||
|             u'video formats') | ||||
|         formats = json.loads(formats_json) | ||||
|         formats = sorted(formats, key=lambda f: f['bitrate']) | ||||
|  | ||||
|         title = get_element_by_id('edit-title', clean_page) | ||||
|         description = clean_html(get_element_by_id('edit-description', clean_page)) | ||||
|         thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', | ||||
|             raw_page, u'thumbnail', flags=re.DOTALL) | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|                 'url': base_url + formats[-1]['url'].replace('mp4:', ''), | ||||
|                 'ext': 'mp4', | ||||
|                 'description': description, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 } | ||||
|  | ||||
|  | ||||
| class MITIE(TechTVMITIE): | ||||
|     IE_NAME = u'video.mit.edu' | ||||
|     _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | ||||
|         u'file': u'21783.mp4', | ||||
|         u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', | ||||
|         u'info_dict': { | ||||
|             u'title': u'The Government is Profiling You', | ||||
|             u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) | ||||
|         embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, | ||||
|             u'embed url') | ||||
|         return self.url_result(embed_url, ie='TechTVMIT') | ||||
| @@ -5,34 +5,27 @@ import socket | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MixcloudIE(InfoExtractor): | ||||
|     _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' | ||||
|     IE_NAME = u'mixcloud' | ||||
|  | ||||
|     def report_download_json(self, file_id): | ||||
|         """Report JSON download.""" | ||||
|         self.to_screen(u'Downloading json') | ||||
|  | ||||
|     def get_urls(self, jsonData, fmt, bitrate='best'): | ||||
|         """Get urls from 'audio_formats' section in json""" | ||||
|         try: | ||||
|             bitrate_list = jsonData[fmt] | ||||
|             if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: | ||||
|                 bitrate = max(bitrate_list) # select highest | ||||
|  | ||||
|             url_list = jsonData[fmt][bitrate] | ||||
|         except TypeError: # we have no bitrate info. | ||||
|             url_list = jsonData[fmt] | ||||
|         return url_list | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||
|         u'file': u'dholbach-cryptkeeper.mp3', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Cryptkeeper', | ||||
|             u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | ||||
|             u'uploader': u'Daniel Holbach', | ||||
|             u'uploader_id': u'dholbach', | ||||
|             u'upload_date': u'20111115', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def check_urls(self, url_list): | ||||
|         """Returns 1st active url from list""" | ||||
| @@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor): | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for fmt in formats.keys(): | ||||
|             for b in formats[fmt]: | ||||
|                 try: | ||||
|                     ext = formats[fmt][b][0] | ||||
|                     print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) | ||||
|                 except TypeError: # we have no bitrate info | ||||
|                     ext = formats[fmt][0] | ||||
|                     print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) | ||||
|                     break | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         # extract uploader & filename from url | ||||
|         uploader = mobj.group(1).decode('utf-8') | ||||
|         file_id = uploader + "-" + mobj.group(2).decode('utf-8') | ||||
|  | ||||
|         # construct API request | ||||
|         file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' | ||||
|         # retrieve .json file with links to files | ||||
|         request = compat_urllib_request.Request(file_url) | ||||
|         try: | ||||
|             self.report_download_json(file_url) | ||||
|             jsonData = compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) | ||||
|         uploader = mobj.group(1) | ||||
|         cloudcast_name = mobj.group(2) | ||||
|         track_id = '-'.join((uploader, cloudcast_name)) | ||||
|         api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|         json_data = self._download_webpage(api_url, track_id, | ||||
|             u'Downloading cloudcast info') | ||||
|         info = json.loads(json_data) | ||||
|  | ||||
|         # parse JSON | ||||
|         json_data = json.loads(jsonData) | ||||
|         player_url = json_data['player_swf_url'] | ||||
|         formats = dict(json_data['audio_formats']) | ||||
|         preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') | ||||
|         song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') | ||||
|         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | ||||
|         final_song_url = self.check_urls(template_url % i for i in range(30)) | ||||
|  | ||||
|         req_format = self._downloader.params.get('format', None) | ||||
|  | ||||
|         if self._downloader.params.get('listformats', None): | ||||
|             self._print_formats(formats) | ||||
|             return | ||||
|  | ||||
|         if req_format is None or req_format == 'best': | ||||
|             for format_param in formats.keys(): | ||||
|                 url_list = self.get_urls(formats, format_param) | ||||
|                 # check urls | ||||
|                 file_url = self.check_urls(url_list) | ||||
|                 if file_url is not None: | ||||
|                     break # got it! | ||||
|         else: | ||||
|             if req_format not in formats: | ||||
|                 raise ExtractorError(u'Format is not available') | ||||
|  | ||||
|             url_list = self.get_urls(formats, req_format) | ||||
|             file_url = self.check_urls(url_list) | ||||
|             format_param = req_format | ||||
|  | ||||
|         return [{ | ||||
|             'id': file_id.decode('utf-8'), | ||||
|             'url': file_url.decode('utf-8'), | ||||
|             'uploader': uploader.decode('utf-8'), | ||||
|             'upload_date': None, | ||||
|             'title': json_data['name'], | ||||
|             'ext': file_url.split('.')[-1].decode('utf-8'), | ||||
|             'format': (format_param is None and u'NA' or format_param.decode('utf-8')), | ||||
|             'thumbnail': json_data['thumbnail_url'], | ||||
|             'description': json_data['description'], | ||||
|             'player_url': player_url.decode('utf-8'), | ||||
|         }] | ||||
|         return { | ||||
|             'id': track_id, | ||||
|             'title': info['name'], | ||||
|             'url': final_song_url, | ||||
|             'ext': 'mp3', | ||||
|             'description': info['description'], | ||||
|             'thumbnail': info['pictures'].get('extra_large'), | ||||
|             'uploader': info['user']['name'], | ||||
|             'uploader_id': info['user']['username'], | ||||
|             'upload_date': unified_strdate(info['created_time']), | ||||
|             'view_count': info['play_count'], | ||||
|         } | ||||
|   | ||||
| @@ -54,23 +54,26 @@ class MTVIE(InfoExtractor): | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         return 'http://mtv.mtvnimages.com/uri/' + uri | ||||
|  | ||||
|     def _extract_video_url(self, metadataXml): | ||||
|     def _extract_video_formats(self, metadataXml): | ||||
|         if '/error_country_block.swf' in metadataXml: | ||||
|             raise ExtractorError(u'This video is not available from your country.', expected=True) | ||||
|         mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8')) | ||||
|         renditions = mdoc.findall('.//rendition') | ||||
|  | ||||
|         # For now, always pick the highest quality. | ||||
|         rendition = renditions[-1] | ||||
|  | ||||
|         try: | ||||
|             _,_,ext = rendition.attrib['type'].partition('/') | ||||
|             format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] | ||||
|             rtmp_video_url = rendition.find('./src').text | ||||
|         except KeyError: | ||||
|             raise ExtractorError('Invalid rendition field.') | ||||
|         video_url = self._transform_rtmp_url(rtmp_video_url) | ||||
|         return {'ext': ext, 'url': video_url, 'format': format} | ||||
|         formats = [] | ||||
|         for rendition in mdoc.findall('.//rendition'): | ||||
|             try: | ||||
|                 _, _, ext = rendition.attrib['type'].partition('/') | ||||
|                 rtmp_video_url = rendition.find('./src').text | ||||
|                 formats.append({'ext': ext, | ||||
|                                 'url': self._transform_rtmp_url(rtmp_video_url), | ||||
|                                 'format_id': rendition.get('bitrate'), | ||||
|                                 'width': int(rendition.get('width')), | ||||
|                                 'height': int(rendition.get('height')), | ||||
|                                 }) | ||||
|             except (KeyError, TypeError): | ||||
|                 raise ExtractorError('Invalid rendition field.') | ||||
|         return formats | ||||
|  | ||||
|     def _get_video_info(self, itemdoc): | ||||
|         uri = itemdoc.find('guid').text | ||||
| @@ -81,19 +84,25 @@ class MTVIE(InfoExtractor): | ||||
|             mediagen_url += '&acceptMethods=fms' | ||||
|         mediagen_page = self._download_webpage(mediagen_url, video_id, | ||||
|                                                u'Downloading video urls') | ||||
|         video_info = self._extract_video_url(mediagen_page) | ||||
|  | ||||
|         description_node = itemdoc.find('description') | ||||
|         if description_node is not None: | ||||
|             description = description_node.text | ||||
|         else: | ||||
|             description = None | ||||
|         video_info.update({'title': itemdoc.find('title').text, | ||||
|                            'id': video_id, | ||||
|                            'thumbnail': self._get_thumbnail_url(uri, itemdoc), | ||||
|                            'description': description, | ||||
|                            }) | ||||
|         return video_info | ||||
|  | ||||
|         info = { | ||||
|             'title': itemdoc.find('title').text, | ||||
|             'formats': self._extract_video_formats(mediagen_page), | ||||
|             'id': video_id, | ||||
|             'thumbnail': self._get_thumbnail_url(uri, itemdoc), | ||||
|             'description': description, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(info['formats'][-1]) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|     def _get_videos_info(self, uri): | ||||
|         video_id = self._id_from_uri(uri) | ||||
|   | ||||
							
								
								
									
										64
									
								
								youtube_dl/extractor/muzu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								youtube_dl/extractor/muzu.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,64 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MuzuTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)' | ||||
|     IE_NAME = u'muzu.tv' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', | ||||
|         u'file': u'1981454.mp4', | ||||
|         u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Cat Walk (Original Mix)', | ||||
|             u'description': u'md5:90e868994de201b2570e4e5854e19420', | ||||
|             u'uploader': u'MarcAshken featuring SOS', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         info_data = compat_urllib_parse.urlencode({'format': 'json', | ||||
|                                                    'url': url, | ||||
|                                                    }) | ||||
|         video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, | ||||
|                                                  video_id, u'Downloading video info') | ||||
|         info = json.loads(video_info_page) | ||||
|  | ||||
|         player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, | ||||
|                                                   video_id, u'Downloading player info') | ||||
|         video_info = json.loads(player_info_page)['videos'][0] | ||||
|         for quality in ['1080' , '720', '480', '360']: | ||||
|             if video_info.get('v%s' % quality): | ||||
|                 break | ||||
|  | ||||
|         data = compat_urllib_parse.urlencode({'ai': video_id, | ||||
|                                               # Even if each time you watch a video the hash changes, | ||||
|                                               # it seems to work for different videos, and it will work | ||||
|                                               # even if you use any non empty string as a hash | ||||
|                                               'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', | ||||
|                                               'device': 'web', | ||||
|                                               'qv': quality, | ||||
|                                               }) | ||||
|         video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, | ||||
|                                                 video_id, u'Downloading video url') | ||||
|         video_url_info = json.loads(video_url_page) | ||||
|         video_url = video_url_info['url'] | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': info['title'], | ||||
|                 'url': video_url, | ||||
|                 'ext': determine_ext(video_url), | ||||
|                 'thumbnail': info['thumbnail_url'], | ||||
|                 'description': info['description'], | ||||
|                 'uploader': info['author_name'], | ||||
|                 } | ||||
| @@ -2,11 +2,13 @@ import binascii | ||||
| import base64 | ||||
| import hashlib | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_ord, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
| ) | ||||
| @@ -16,7 +18,7 @@ from ..utils import ( | ||||
| class MyVideoIE(InfoExtractor): | ||||
|     """Information Extractor for myvideo.de.""" | ||||
|  | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' | ||||
|     IE_NAME = u'myvideo' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', | ||||
| @@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor): | ||||
|                 'ext':      video_ext, | ||||
|             }] | ||||
|  | ||||
|         mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) | ||||
|         if mobj is not None: | ||||
|             request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') | ||||
|             response = self._download_webpage(request, video_id, | ||||
|                                               u'Downloading video info') | ||||
|             info = json.loads(base64.b64decode(response).decode('utf-8')) | ||||
|             return {'id': video_id, | ||||
|                     'title': info['title'], | ||||
|                     'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), | ||||
|                     'play_path': info['filename'], | ||||
|                     'ext': 'flv', | ||||
|                     'thumbnail': info['thumbnail'][0]['url'], | ||||
|                     } | ||||
|  | ||||
|         # try encxml | ||||
|         mobj = re.search('var flashvars={(.+?)}', webpage) | ||||
|         if mobj is None: | ||||
|   | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/naver.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/naver.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NaverIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://tvcast.naver.com/v/81652', | ||||
|         u'file': u'81652.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', | ||||
|             u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', | ||||
|             u'upload_date': u'20130903', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', | ||||
|             webpage) | ||||
|         if m_id is None: | ||||
|             raise ExtractorError(u'couldn\'t extract vid and key') | ||||
|         vid = m_id.group(1) | ||||
|         key = m_id.group(2) | ||||
|         query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) | ||||
|         query_urls = compat_urllib_parse.urlencode({ | ||||
|             'masterVid': vid, | ||||
|             'protocol': 'p2p', | ||||
|             'inKey': key, | ||||
|         }) | ||||
|         info_xml = self._download_webpage( | ||||
|             'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, | ||||
|             video_id, u'Downloading video info') | ||||
|         urls_xml = self._download_webpage( | ||||
|             'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, | ||||
|             video_id, u'Downloading video formats info') | ||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | ||||
|         urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8')) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_el in urls.findall('EncodingOptions/EncodingOption'): | ||||
|             domain = format_el.find('Domain').text | ||||
|             if domain.startswith('rtmp'): | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': domain + format_el.find('uri').text, | ||||
|                 'ext': 'mp4', | ||||
|                 'width': int(format_el.find('width').text), | ||||
|                 'height': int(format_el.find('height').text), | ||||
|             }) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': info.find('Subject').text, | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'upload_date': info.find('WriteDate').text.replace('.', ''), | ||||
|             'view_count': int(info.find('PlayCount').text), | ||||
|         } | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|         return info | ||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/nbc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/nbc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import find_xpath_attr, compat_str | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.nbcnews.com/video/nbc-news/52753292', | ||||
|         u'file': u'52753292.flv', | ||||
|         u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Crew emerges after four-month Mars food study', | ||||
|             u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | ||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': info.find('headline').text, | ||||
|                 'ext': 'flv', | ||||
|                 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | ||||
|                 'description': compat_str(info.find('caption').text), | ||||
|                 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||
|                 } | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/newgrounds.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/newgrounds.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class NewgroundsIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.newgrounds.com/audio/listen/549479', | ||||
|         u'file': u'549479.mp3', | ||||
|         u'md5': u'fe6033d297591288fa1c1f780386f07a', | ||||
|         u'info_dict': { | ||||
|             u"title": u"B7 - BusMode", | ||||
|             u"uploader": u"Burn7", | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         music_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, music_id) | ||||
|          | ||||
|         title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title') | ||||
|         uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader') | ||||
|          | ||||
|         music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}' | ||||
|         music_url_json = json.loads(music_url_json_string) | ||||
|         music_url = music_url_json['url'] | ||||
|  | ||||
|         return { | ||||
|             'id':       music_id, | ||||
|             'title':    title, | ||||
|             'url':      music_url, | ||||
|             'uploader': uploader, | ||||
|             'ext':      determine_ext(music_url), | ||||
|         } | ||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/ooyala.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/ooyala.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unescapeHTML | ||||
|  | ||||
| class OoyalaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|         u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|         u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', | ||||
|         u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Explaining Data Recovery from Hard Drives and SSDs', | ||||
|             u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _url_for_embed_code(embed_code): | ||||
|         return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code | ||||
|  | ||||
|     def _extract_result(self, info, more_info): | ||||
|         return {'id': info['embedCode'], | ||||
|                 'ext': 'mp4', | ||||
|                 'title': unescapeHTML(info['title']), | ||||
|                 'url': info.get('ipad_url') or info['url'], | ||||
|                 'description': unescapeHTML(more_info['description']), | ||||
|                 'thumbnail': more_info['promo'], | ||||
|                 } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         embedCode = mobj.group('id') | ||||
|         player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode | ||||
|         player = self._download_webpage(player_url, embedCode) | ||||
|         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', | ||||
|                                         player, u'mobile player url') | ||||
|         mobile_player = self._download_webpage(mobile_url, embedCode) | ||||
|         videos_info = self._search_regex( | ||||
|             r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', | ||||
|             mobile_player, u'info').replace('\\"','"') | ||||
|         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') | ||||
|         videos_info = json.loads(videos_info) | ||||
|         videos_more_info =json.loads(videos_more_info) | ||||
|  | ||||
|         if videos_more_info.get('lineup'): | ||||
|             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] | ||||
|             return {'_type': 'playlist', | ||||
|                     'id': embedCode, | ||||
|                     'title': unescapeHTML(videos_more_info['title']), | ||||
|                     'entries': videos, | ||||
|                     } | ||||
|         else: | ||||
|             return self._extract_result(videos_info[0], videos_more_info) | ||||
|          | ||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/orf.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/orf.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| # coding: utf-8 | ||||
|  | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
| class ORFIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') | ||||
|         flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] | ||||
|         flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) | ||||
|         playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') | ||||
|         playlist = json.loads(playlist_json) | ||||
|  | ||||
|         videos = [] | ||||
|         ns = '{http://tempuri.org/XMLSchema.xsd}' | ||||
|         xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} | ||||
|         webpage_description = self._og_search_description(webpage) | ||||
|         for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): | ||||
|             # Get best quality url | ||||
|             rtmp_url = None | ||||
|             for q in ['Q6A', 'Q4A', 'Q1A']: | ||||
|                 video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) | ||||
|                 if video_url is not None: | ||||
|                     rtmp_url = video_url.text | ||||
|                     break | ||||
|             if rtmp_url is None: | ||||
|                 raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) | ||||
|             description = self._html_search_regex( | ||||
|                 r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage, | ||||
|                 u'description', default=webpage_description, flags=re.DOTALL) | ||||
|             videos.append({ | ||||
|                 '_type': 'video', | ||||
|                 'id': info['id'], | ||||
|                 'title': info['title'], | ||||
|                 'url': rtmp_url, | ||||
|                 'ext': 'flv', | ||||
|                 'description': description, | ||||
|                 }) | ||||
|  | ||||
|         return videos | ||||
							
								
								
									
										34
									
								
								youtube_dl/extractor/pbs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								youtube_dl/extractor/pbs.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class PBSIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.pbs.org/video/2365006249/', | ||||
|         u'file': u'2365006249.mp4', | ||||
|         u'md5': 'ce1888486f0908d555a8093cac9a7362', | ||||
|         u'info_dict': { | ||||
|             u'title': u'A More Perfect Union', | ||||
|             u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a', | ||||
|             u'duration': 3190, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id | ||||
|         info_page = self._download_webpage(info_url, video_id) | ||||
|         info =json.loads(info_page) | ||||
|         return {'id': video_id, | ||||
|                 'title': info['title'], | ||||
|                 'url': info['alternate_encoding']['url'], | ||||
|                 'ext': 'mp4', | ||||
|                 'description': info['program'].get('description'), | ||||
|                 'thumbnail': info.get('image_url'), | ||||
|                 'duration': info.get('duration'), | ||||
|                 } | ||||
| @@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor): | ||||
|         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' | ||||
|         upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) | ||||
|         if upload_date: upload_date = unified_strdate(upload_date) | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         info = {'id': video_id, | ||||
|                 'url': video_url, | ||||
| @@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor): | ||||
|                 'upload_date': upload_date, | ||||
|                 'title': video_title, | ||||
|                 'ext': 'flv', | ||||
|                 'format': 'flv'} | ||||
|                 'format': 'flv', | ||||
|                 'age_limit': age_limit} | ||||
|  | ||||
|         return [info] | ||||
|   | ||||
| @@ -14,24 +14,30 @@ class RedTubeIE(InfoExtractor): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         video_extension = 'mp4'         | ||||
|         video_extension = 'mp4' | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">', | ||||
|             webpage, u'video URL') | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') | ||||
|  | ||||
|         video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      video_extension, | ||||
|             'title':    video_title, | ||||
|         }] | ||||
|         # No self-labeling, but they describe themselves as | ||||
|         # "Home of Videos Porno" | ||||
|         age_limit = 18 | ||||
|  | ||||
|         return { | ||||
|             'id':        video_id, | ||||
|             'url':       video_url, | ||||
|             'ext':       video_extension, | ||||
|             'title':     video_title, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										42
									
								
								youtube_dl/extractor/ro220.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								youtube_dl/extractor/ro220.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     compat_parse_qs, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Ro220IE(InfoExtractor): | ||||
|     IE_NAME = '220.ro' | ||||
|     _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", | ||||
|         u'file': u'LYV6doKo7f.mp4', | ||||
|         u'md5': u'03af18b73a07b4088753930db7a34add', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Luati-le Banii sez 4 ep 1", | ||||
|             u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         flashVars_str = self._search_regex( | ||||
|             r'<param name="flashVars" value="([^"]+)"', | ||||
|             webpage, u'flashVars') | ||||
|         flashVars = compat_parse_qs(flashVars_str) | ||||
|  | ||||
|         info = { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
|             'url': flashVars['videoURL'][0], | ||||
|             'title': flashVars['title'][0], | ||||
|             'description': clean_html(flashVars['desc'][0]), | ||||
|             'thumbnail': flashVars['preview'][0], | ||||
|         } | ||||
|         return info | ||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/roxwel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/roxwel.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate, determine_ext | ||||
|  | ||||
|  | ||||
| class RoxwelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html', | ||||
|         u'file': u'passionpittakeawalklive.flv', | ||||
|         u'md5': u'd9dea8360a1e7d485d2206db7fe13035', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Take A Walk (live)', | ||||
|             u'uploader': u'Passion Pit', | ||||
|             u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', | ||||
|         }, | ||||
|         u'skip': u'Requires rtmpdump', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         filename = mobj.group('filename') | ||||
|         info_url = 'http://www.roxwel.com/api/videos/%s' % filename | ||||
|         info_page = self._download_webpage(info_url, filename, | ||||
|                                            u'Downloading video info') | ||||
|  | ||||
|         self.report_extraction(filename) | ||||
|         info = json.loads(info_page) | ||||
|         rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) | ||||
|         best_rate = rtmp_rates[-1] | ||||
|         url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) | ||||
|         rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url') | ||||
|         ext = determine_ext(rtmp_url) | ||||
|         if ext == 'f4v': | ||||
|             rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) | ||||
|  | ||||
|         return {'id': filename, | ||||
|                 'title': info['title'], | ||||
|                 'url': rtmp_url, | ||||
|                 'ext': 'flv', | ||||
|                 'description': info['description'], | ||||
|                 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), | ||||
|                 'uploader': info['artist'], | ||||
|                 'uploader_id': info['artistname'], | ||||
|                 'upload_date': unified_strdate(info['dbdate']), | ||||
|                 } | ||||
							
								
								
									
										156
									
								
								youtube_dl/extractor/rtlnow.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										156
									
								
								youtube_dl/extractor/rtlnow.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,156 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
| class RTLnowIE(InfoExtractor): | ||||
|     """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" | ||||
|     _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/|(?:www\.)?n-tvnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' | ||||
|     _TESTS = [{ | ||||
|         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', | ||||
|         u'file': u'90419.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20070416',  | ||||
|             u'title': u'Ahornallee - Folge 1 - Der Einzug', | ||||
|             u'description': u'Folge 1 - Der Einzug', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         u'skip': u'Only works from Germany', | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', | ||||
|         u'file': u'69756.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20120519',  | ||||
|             u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', | ||||
|             u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', | ||||
|             u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         u'skip': u'Only works from Germany', | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', | ||||
|         u'file': u'13883.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20090627',  | ||||
|             u'title': u'Voxtours - Südafrika-Reporter II', | ||||
|             u'description': u'Südafrika-Reporter II', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', | ||||
|         u'file': u'99205.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20080928',  | ||||
|             u'title': u'Medicopter 117 - Angst!', | ||||
|             u'description': u'Angst!', | ||||
|             u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1', | ||||
|         u'file': u'127367.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20130926',  | ||||
|             u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...', | ||||
|             u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin', | ||||
|             u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', | ||||
|         u'file': u'124903.flv', | ||||
|         u'info_dict': { | ||||
|             u'upload_date': u'20130101',  | ||||
|             u'title': u'Top Gear vom 01.01.2013', | ||||
|             u'description': u'Episode 1', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|         u'skip': u'Only works from Germany', | ||||
|     }] | ||||
|  | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         webpage_url = u'http://' + mobj.group('url') | ||||
|         video_page_url = u'http://' + mobj.group('base_url') | ||||
|         video_id = mobj.group(u'video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         note_m = re.search(r'''(?sx) | ||||
|             <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?) | ||||
|             <div[ ]id="playerteaser">''', webpage) | ||||
|         if note_m: | ||||
|             msg = clean_html(note_m.group(1)) | ||||
|             raise ExtractorError(msg) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', | ||||
|             webpage, u'title') | ||||
|         playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', | ||||
|             webpage, u'playerdata_url') | ||||
|  | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id) | ||||
|         mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata) | ||||
|         if mobj: | ||||
|             video_description = mobj.group(u'description') | ||||
|             if mobj.group('upload_date_Y'): | ||||
|                 video_upload_date = mobj.group('upload_date_Y') | ||||
|             elif mobj.group('upload_date_y'): | ||||
|                 video_upload_date = u'20' + mobj.group('upload_date_y') | ||||
|             else: | ||||
|                 video_upload_date = None | ||||
|             if video_upload_date: | ||||
|                 video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') | ||||
|         else: | ||||
|             video_description = None | ||||
|             video_upload_date = None | ||||
|             self._downloader.report_warning(u'Unable to extract description and upload date') | ||||
|  | ||||
|         # Thumbnail: not every video has an thumbnail | ||||
|         mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) | ||||
|         if mobj: | ||||
|             video_thumbnail = mobj.group(u'thumbnail') | ||||
|         else: | ||||
|             video_thumbnail = None | ||||
|  | ||||
|         mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Unable to extract media URL') | ||||
|         video_url = mobj.group(u'url') | ||||
|         video_play_path = u'mp4:' + mobj.group(u'play_path') | ||||
|         video_player_url = video_page_url + u'includes/vodplayer.swf' | ||||
|  | ||||
|         return [{ | ||||
|             'id':          video_id, | ||||
|             'url':         video_url, | ||||
|             'play_path':   video_play_path, | ||||
|             'page_url':    video_page_url, | ||||
|             'player_url':  video_player_url, | ||||
|             'ext':         'flv', | ||||
|             'title':       video_title, | ||||
|             'description': video_description, | ||||
|             'upload_date': video_upload_date, | ||||
|             'thumbnail':   video_thumbnail, | ||||
|         }] | ||||
							
								
								
									
										23
									
								
								youtube_dl/extractor/slashdot.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								youtube_dl/extractor/slashdot.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SlashdotIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', | ||||
|         u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', | ||||
|         u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', | ||||
|         u'info_dict': { | ||||
|             u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url') | ||||
|         return self.url_result(ooyala_url, 'Ooyala') | ||||
							
								
								
									
										47
									
								
								youtube_dl/extractor/slideshare.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								youtube_dl/extractor/slideshare.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SlideshareIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', | ||||
|         u'file': u'25665706.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Managing Scale and Complexity', | ||||
|             u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         slideshare_obj = self._search_regex( | ||||
|             r'var slideshare_object =  ({.*?}); var user_info =', | ||||
|             webpage, u'slideshare object') | ||||
|         info = json.loads(slideshare_obj) | ||||
|         if info['slideshow']['type'] != u'video': | ||||
|             raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) | ||||
|  | ||||
|         doc = info['doc'] | ||||
|         bucket = info['jsplayer']['video_bucket'] | ||||
|         ext = info['jsplayer']['video_extension'] | ||||
|         video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': info['slideshow']['id'], | ||||
|             'title': info['slideshow']['title'], | ||||
|             'ext': ext, | ||||
|             'url': video_url, | ||||
|             'thumbnail': info['slideshow']['pin_image_url'], | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
							
								
								
									
										94
									
								
								youtube_dl/extractor/sohu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								youtube_dl/extractor/sohu.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | ||||
| # encoding: utf-8 | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class SohuIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super', | ||||
|         u'file': u'382479172.mp4', | ||||
|         u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7', | ||||
|         u'info_dict': { | ||||
|             u'title': u'MV:Far East Movement《The Illest》', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|  | ||||
|         def _fetch_data(vid_id, mytv=False): | ||||
|             if mytv: | ||||
|                 base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' | ||||
|             else: | ||||
|                 base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid=' | ||||
|             data_url = base_data_url + str(vid_id) | ||||
|             data_json = self._download_webpage( | ||||
|                 data_url, video_id, | ||||
|                 note=u'Downloading JSON data for ' + str(vid_id)) | ||||
|             return json.loads(data_json) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         mytv = mobj.group('mytv') is not None | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>', | ||||
|                                             webpage, u'video title') | ||||
|         title = raw_title.partition('-')[0].strip() | ||||
|  | ||||
|         vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage, | ||||
|                                       u'video path') | ||||
|         data = _fetch_data(vid, mytv) | ||||
|  | ||||
|         QUALITIES = ('ori', 'super', 'high', 'nor') | ||||
|         vid_ids = [data['data'][q + 'Vid'] | ||||
|                    for q in QUALITIES | ||||
|                    if data['data'][q + 'Vid'] != 0] | ||||
|         if not vid_ids: | ||||
|             raise ExtractorError(u'No formats available for this video') | ||||
|  | ||||
|         # For now, we just pick the highest available quality | ||||
|         vid_id = vid_ids[-1] | ||||
|  | ||||
|         format_data = data if vid == vid_id else _fetch_data(vid_id, mytv) | ||||
|         part_count = format_data['data']['totalBlocks'] | ||||
|         allot = format_data['allot'] | ||||
|         prot = format_data['prot'] | ||||
|         clipsURL = format_data['data']['clipsURL'] | ||||
|         su = format_data['data']['su'] | ||||
|  | ||||
|         playlist = [] | ||||
|         for i in range(part_count): | ||||
|             part_url = ('http://%s/?prot=%s&file=%s&new=%s' % | ||||
|                         (allot, prot, clipsURL[i], su[i])) | ||||
|             part_str = self._download_webpage( | ||||
|                 part_url, video_id, | ||||
|                 note=u'Downloading part %d of %d' % (i+1, part_count)) | ||||
|  | ||||
|             part_info = part_str.split('|') | ||||
|             video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) | ||||
|  | ||||
|             video_info = { | ||||
|                 'id': '%s_part%02d' % (video_id, i + 1), | ||||
|                 'title': title, | ||||
|                 'url': video_url, | ||||
|                 'ext': 'mp4', | ||||
|             } | ||||
|             playlist.append(video_info) | ||||
|  | ||||
|         if len(playlist) == 1: | ||||
|             info = playlist[0] | ||||
|             info['id'] = video_id | ||||
|         else: | ||||
|             info = { | ||||
|                 '_type': 'playlist', | ||||
|                 'entries': playlist, | ||||
|                 'id': video_id, | ||||
|             } | ||||
|  | ||||
|         return info | ||||
| @@ -1,9 +1,12 @@ | ||||
| import json | ||||
| import re | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| @@ -22,6 +25,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|     _VALID_URL = r'''^(?:https?://)? | ||||
|                     (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$) | ||||
|                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) | ||||
|                        |(?P<widget>w.soundcloud.com/player/?.*?url=.*) | ||||
|                     ) | ||||
|                     ''' | ||||
|     IE_NAME = u'soundcloud' | ||||
| @@ -51,10 +55,11 @@ class SoundcloudIE(InfoExtractor): | ||||
|     def _resolv_url(cls, url): | ||||
|         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID | ||||
|  | ||||
|     def _extract_info_dict(self, info, full_title=None): | ||||
|     def _extract_info_dict(self, info, full_title=None, quiet=False): | ||||
|         video_id = info['id'] | ||||
|         name = full_title or video_id | ||||
|         self.report_extraction(name) | ||||
|         if quiet == False: | ||||
|             self.report_extraction(name) | ||||
|  | ||||
|         thumbnail = info['artwork_url'] | ||||
|         if thumbnail is not None: | ||||
| @@ -79,6 +84,9 @@ class SoundcloudIE(InfoExtractor): | ||||
|         if track_id is not None: | ||||
|             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID | ||||
|             full_title = track_id | ||||
|         elif mobj.group('widget'): | ||||
|             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|             return self.url_result(query['url'][0], ie='Soundcloud') | ||||
|         else: | ||||
|             # extract uploader (which is in the url) | ||||
|             uploader = mobj.group(1) | ||||
| @@ -193,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE): | ||||
|                 'id': info['id'], | ||||
|                 'title': info['title'], | ||||
|                 } | ||||
|  | ||||
|  | ||||
| class SoundcloudUserIE(SoundcloudIE): | ||||
|     _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$' | ||||
|     IE_NAME = u'soundcloud:user' | ||||
|  | ||||
|     # it's in tests/test_playlists.py | ||||
|     _TEST = None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uploader = mobj.group('user') | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/' % uploader | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         user_json = self._download_webpage(resolv_url, uploader, | ||||
|             u'Downloading user info') | ||||
|         user = json.loads(user_json) | ||||
|  | ||||
|         tracks = [] | ||||
|         for i in itertools.count(): | ||||
|             data = compat_urllib_parse.urlencode({'offset': i*50, | ||||
|                                                   'client_id': self._CLIENT_ID, | ||||
|                                                   }) | ||||
|             tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data | ||||
|             response = self._download_webpage(tracks_url, uploader,  | ||||
|                 u'Downloading tracks page %s' % (i+1)) | ||||
|             new_tracks = json.loads(response) | ||||
|             tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) | ||||
|             if len(new_tracks) < 50: | ||||
|                 break | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': compat_str(user['id']), | ||||
|             'title': user['username'], | ||||
|             'entries': tracks, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/southparkstudios.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/southparkstudios.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import re | ||||
|  | ||||
| from .mtv import MTVIE, _media_xml_tag | ||||
|  | ||||
|  | ||||
| class SouthParkStudiosIE(MTVIE): | ||||
|     IE_NAME = u'southparkstudios.com' | ||||
|     _VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)' | ||||
|  | ||||
|     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', | ||||
|         u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Bat Daded', | ||||
|             u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     # Overwrite MTVIE properties we don't want | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         thumb_node = itemdoc.find(search_path) | ||||
|         if thumb_node is None: | ||||
|             return None | ||||
|         else: | ||||
|             return thumb_node.attrib['url'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', | ||||
|                                   webpage, u'mgid') | ||||
|         return self._get_videos_info(mgid) | ||||
| @@ -5,13 +5,13 @@ from .common import InfoExtractor | ||||
| class StatigramIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://statigr.am/p/484091715184808010_284179915', | ||||
|         u'file': u'484091715184808010_284179915.mp4', | ||||
|         u'md5': u'deda4ff333abe2e118740321e992605b', | ||||
|         u'url': u'http://statigr.am/p/522207370455279102_24101272', | ||||
|         u'file': u'522207370455279102_24101272.mp4', | ||||
|         u'md5': u'6eb93b882a3ded7c378ee1d6884b1814', | ||||
|         u'info_dict': { | ||||
|             u"uploader_id": u"videoseconds",  | ||||
|             u"title": u"Instagram photo by @videoseconds" | ||||
|         } | ||||
|             u'uploader_id': u'aguynamedpatrick', | ||||
|             u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										91
									
								
								youtube_dl/extractor/subtitles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								youtube_dl/extractor/subtitles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SubtitlesInfoExtractor(InfoExtractor): | ||||
|     @property | ||||
|     def _have_to_download_any_subtitles(self): | ||||
|         return any([self._downloader.params.get('writesubtitles', False), | ||||
|                     self._downloader.params.get('writeautomaticsub')]) | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id, webpage=None): | ||||
|         """ outputs the available subtitles for the video """ | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         auto_captions_list = self._get_available_automatic_caption(video_id, webpage) | ||||
|         sub_lang = ",".join(list(sub_lang_list.keys())) | ||||
|         self.to_screen(u'%s: Available subtitles for video: %s' % | ||||
|                        (video_id, sub_lang)) | ||||
|         auto_lang = ",".join(auto_captions_list.keys()) | ||||
|         self.to_screen(u'%s: Available automatic captions for video: %s' % | ||||
|                        (video_id, auto_lang)) | ||||
|  | ||||
|     def extract_subtitles(self, video_id, video_webpage=None): | ||||
|         """ | ||||
|         returns {sub_lang: sub} ,{} if subtitles not found or None if the | ||||
|         subtitles aren't requested. | ||||
|         """ | ||||
|         if not self._have_to_download_any_subtitles: | ||||
|             return None | ||||
|         available_subs_list = {} | ||||
|         if self._downloader.params.get('writeautomaticsub', False): | ||||
|             available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) | ||||
|         if self._downloader.params.get('writesubtitles', False): | ||||
|             available_subs_list.update(self._get_available_subtitles(video_id)) | ||||
|  | ||||
|         if not available_subs_list:  # error, it didn't get the available subtitles | ||||
|             return {} | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             sub_lang_list = available_subs_list | ||||
|         else: | ||||
|             if self._downloader.params.get('subtitleslangs', False): | ||||
|                 requested_langs = self._downloader.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs_list: | ||||
|                 requested_langs = ['en'] | ||||
|             else: | ||||
|                 requested_langs = [list(available_subs_list.keys())[0]] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for sub_lang in requested_langs: | ||||
|                 if not sub_lang in available_subs_list: | ||||
|                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) | ||||
|                     continue | ||||
|                 sub_lang_list[sub_lang] = available_subs_list[sub_lang] | ||||
|  | ||||
|         subtitles = {} | ||||
|         for sub_lang, url in sub_lang_list.items(): | ||||
|             subtitle = self._request_subtitle_url(sub_lang, url) | ||||
|             if subtitle: | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _request_subtitle_url(self, sub_lang, url): | ||||
|         """ makes the http request for the subtitle """ | ||||
|         try: | ||||
|             sub = self._download_webpage(url, None, note=False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses that support automatic captions, | ||||
|         otherwise it will return {} | ||||
|         """ | ||||
|         self._downloader.report_warning(u'Automatic Captions not supported by this server') | ||||
|         return {} | ||||
| @@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor): | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|         data = self._download_webpage(data_url, video_id, 'Downloading data webpage') | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>', | ||||
|         video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>', | ||||
|             data, u'video URL') | ||||
|  | ||||
|         return [{ | ||||
|   | ||||
| @@ -77,12 +77,20 @@ class TEDIE(InfoExtractor): | ||||
|          | ||||
|         thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"', | ||||
|                                        webpage, 'thumbnail') | ||||
|         formats = [{ | ||||
|             'ext': 'mp4', | ||||
|             'url': stream['file'], | ||||
|             'format': stream['id'] | ||||
|             } for stream in info['htmlStreams']] | ||||
|         info = { | ||||
|                 'id': info['id'], | ||||
|                 'url': info['htmlStreams'][-1]['file'], | ||||
|                 'ext': 'mp4', | ||||
|                 'title': title, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'description': desc, | ||||
|                 } | ||||
|             'id': info['id'], | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': desc, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(info['formats'][-1]) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -6,20 +6,17 @@ import re | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class TF1IE(InfoExtractor): | ||||
|     """ | ||||
|     TF1 uses the wat.tv player, currently it can only download videos with the | ||||
|     html5 player enabled, it cannot download HD videos. | ||||
|     """ | ||||
|     _WORKING = False | ||||
|     """TF1 uses the wat.tv player.""" | ||||
|     _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' | ||||
|     _TEST = { | ||||
|         u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', | ||||
|         u'file': u'10635995.mp4', | ||||
|         u'md5': u'66789d3e91278d332f75e1feb7aea327', | ||||
|         u'md5': u'2e378cc28b9957607d5e88f274e637d8', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle', | ||||
|             u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', | ||||
|         } | ||||
|         }, | ||||
|         u'skip': u'Sometimes wat serves the whole file with the --test option', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/trilulilu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/trilulilu.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| import json | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class TriluliluIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1", | ||||
|         u'file': u"big-buck-bunny-1.mp4", | ||||
|         u'info_dict': { | ||||
|             u"title": u"Big Buck Bunny", | ||||
|             u"description": u":) pentru copilul din noi", | ||||
|         }, | ||||
|         # Server ignores Range headers (--test) | ||||
|         u"params": { | ||||
|             u"skip_download": True | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|  | ||||
|         log_str = self._search_regex( | ||||
|             r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info') | ||||
|         log = json.loads(log_str) | ||||
|  | ||||
|         format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/' | ||||
|                       u'video-formats2' % log) | ||||
|         format_str = self._download_webpage( | ||||
|             format_url, video_id, | ||||
|             note=u'Downloading formats', | ||||
|             errnote=u'Error while downloading formats') | ||||
|  | ||||
|         format_doc = xml.etree.ElementTree.fromstring(format_str) | ||||
|   | ||||
|         video_url_template = ( | ||||
|             u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' | ||||
|             u'&source=site&hash=%(hash)s&username=%(userid)s&' | ||||
|             u'key=ministhebest&format=%%s&sig=&exp=' % | ||||
|             log) | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format': fnode.text, | ||||
|                 'url': video_url_template % fnode.text, | ||||
|                 'ext': fnode.text.partition('-')[0] | ||||
|             } | ||||
|  | ||||
|             for fnode in format_doc.findall('./formats/format') | ||||
|         ] | ||||
|  | ||||
|         info = { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info.update(formats[-1]) | ||||
|  | ||||
|         return info | ||||
							
								
								
									
										32
									
								
								youtube_dl/extractor/unistra.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								youtube_dl/extractor/unistra.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| class UnistraIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://utv.unistra.fr/video.php?id_video=154', | ||||
|         u'file': u'154.mp4', | ||||
|         u'md5': u'736f605cfdc96724d55bb543ab3ced24', | ||||
|         u'info_dict': { | ||||
|             u'title': u'M!ss Yella', | ||||
|             u'description': u'md5:104892c71bd48e55d70b902736b81bbf', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         id = re.match(self._VALID_URL, url).group(1) | ||||
|         webpage = self._download_webpage(url, id) | ||||
|         file = re.search(r'file: "(.*?)",', webpage).group(1) | ||||
|         title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title') | ||||
|  | ||||
|         video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file | ||||
|  | ||||
|         return {'id': id, | ||||
|                 'title': title, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL), | ||||
|                 'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'), | ||||
|                 } | ||||
| @@ -1,6 +1,11 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     get_meta_content, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class UstreamIE(InfoExtractor): | ||||
| @@ -43,3 +48,25 @@ class UstreamIE(InfoExtractor): | ||||
|                 'thumbnail': thumbnail, | ||||
|                } | ||||
|         return info | ||||
|  | ||||
| class UstreamChannelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' | ||||
|     IE_NAME = u'ustream:channel' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         slug = m.group('slug') | ||||
|         webpage = self._download_webpage(url, slug) | ||||
|         channel_id = get_meta_content('ustream:channel_id', webpage) | ||||
|  | ||||
|         BASE = 'http://www.ustream.tv' | ||||
|         next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id | ||||
|         video_ids = [] | ||||
|         while next_url: | ||||
|             reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id)) | ||||
|             video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data'])) | ||||
|             next_url = reply['nextUrl'] | ||||
|  | ||||
|         urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids] | ||||
|         url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls] | ||||
|         return self.playlist_result(url_entries, channel_id) | ||||
|   | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/veehd.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/veehd.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     get_element_by_id, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
| class VeeHDIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://veehd.com/video/4686958', | ||||
|         u'file': u'4686958.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Time Lapse View from Space ( ISS)', | ||||
|             u'uploader_id': u'spotted', | ||||
|             u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"', | ||||
|             webpage, u'player path') | ||||
|         player_url = compat_urlparse.urljoin(url, player_path) | ||||
|         player_page = self._download_webpage(player_url, video_id, | ||||
|             u'Downloading player page') | ||||
|         config_json = self._search_regex(r'value=\'config=({.+?})\'', | ||||
|             player_page, u'config json') | ||||
|         config = json.loads(config_json) | ||||
|  | ||||
|         video_url = compat_urlparse.unquote(config['clip']['url']) | ||||
|         title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) | ||||
|         uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>', | ||||
|             webpage, u'uploader') | ||||
|         thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"', | ||||
|             webpage, u'thumbnail') | ||||
|         description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul', | ||||
|             webpage, u'description', flags=re.DOTALL) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'uploader_id': uploader_id, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|         } | ||||
| @@ -8,18 +8,18 @@ from ..utils import ( | ||||
|  | ||||
| class VevoIE(InfoExtractor): | ||||
|     """ | ||||
|     Accecps urls from vevo.com or in the format 'vevo:{id}' | ||||
|     Accepts urls from vevo.com or in the format 'vevo:{id}' | ||||
|     (currently used by MTVIE) | ||||
|     """ | ||||
|     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$' | ||||
|     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | ||||
|         u'file': u'GB1101300280.mp4', | ||||
|         u'md5': u'06bea460acb744eab74a9d7dcb4bfd61', | ||||
|         u'info_dict': { | ||||
|             u"upload_date": u"20130624",  | ||||
|             u"uploader": u"Hurts",  | ||||
|             u"title": u"Somebody To Die For" | ||||
|             u"upload_date": u"20130624", | ||||
|             u"uploader": u"Hurts", | ||||
|             u"title": u"Somebody to Die For" | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -35,12 +35,12 @@ class VevoIE(InfoExtractor): | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_info = json.loads(info_json) | ||||
|         m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage)) | ||||
|         m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage)) | ||||
|         if m_urls is None or len(m_urls) == 0: | ||||
|             raise ExtractorError(u'Unable to extract video url') | ||||
|         # They are sorted from worst to best quality | ||||
|         m_url = m_urls[-1] | ||||
|         video_url = base_url + m_url.group('url') | ||||
|         video_url = base_url + '/' + m_url.group('url') | ||||
|         ext = m_url.group('ext') | ||||
|  | ||||
|         return {'url': video_url, | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/vice.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/vice.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class ViceIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', | ||||
|         u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # Requires ffmpeg (m3u8 manifest) | ||||
|             u'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         try: | ||||
|             ooyala_url = self._og_search_video_url(webpage) | ||||
|         except ExtractorError: | ||||
|             try: | ||||
|                 embed_code = self._search_regex( | ||||
|                     r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, | ||||
|                     u'ooyala embed code') | ||||
|                 ooyala_url = OoyalaIE._url_for_embed_code(embed_code) | ||||
|             except ExtractorError: | ||||
|                 raise ExtractorError(u'The page doesn\'t contain a video', expected=True) | ||||
|         return self.url_result(ooyala_url, ie='Ooyala') | ||||
|  | ||||
							
								
								
									
										64
									
								
								youtube_dl/extractor/viddler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								youtube_dl/extractor/viddler.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,64 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ViddlerIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.viddler.com/v/43903784", | ||||
|         u'file': u'43903784.mp4', | ||||
|         u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Video Made Easy", | ||||
|             u"uploader": u"viddler", | ||||
|             u"duration": 100.89, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         embed_url = mobj.group('domain') + u'/embed/' + video_id | ||||
|         webpage = self._download_webpage(embed_url, video_id) | ||||
|  | ||||
|         video_sources_code = self._search_regex( | ||||
|             r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') | ||||
|         video_sources = json.loads(video_sources_code.replace("'", '"')) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': video_url, | ||||
|             'format': format_id, | ||||
|         } for video_url, format_id in video_sources.items()] | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r"title\s*:\s*'([^']*)'", webpage, u'title') | ||||
|         uploader = self._html_search_regex( | ||||
|             r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) | ||||
|         duration_s = self._html_search_regex( | ||||
|             r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) | ||||
|         duration = float(duration_s) if duration_s else None | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r"thumbnail\s*:\s*'([^']*)'", | ||||
|             webpage, u'thumbnail', fatal=False) | ||||
|  | ||||
|         info = { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         # TODO: Remove when #980 has been merged | ||||
|         info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) | ||||
|         info.update(info['formats'][-1]) | ||||
|  | ||||
|         return info | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user